diff --git a/.github/workflows/jan-docs-new-release.yaml b/.github/workflows/jan-docs-new-release.yaml deleted file mode 100644 index 0694658cf..000000000 --- a/.github/workflows/jan-docs-new-release.yaml +++ /dev/null @@ -1,63 +0,0 @@ -name: Deploy Docs on new release - -on: - release: - types: - - published - - edited - - released - -jobs: - deploy: - name: Deploy to CloudFlare Pages - env: - CLOUDFLARE_PROJECT_NAME: docs - runs-on: ubuntu-latest - permissions: - contents: write - deployments: write - pull-requests: write - steps: - - uses: actions/checkout@v4 - with: - ref: dev - - uses: actions/setup-node@v3 - with: - node-version: 20 - - - name: Install jq - uses: dcarbone/install-jq-action@v2.0.1 - - - name: Fill env vars - working-directory: docs - run: | - env_example_file=".env.example" - touch .env - while IFS= read -r line || [[ -n "$line" ]]; do - if [[ "$line" == *"="* ]]; then - var_name=$(echo $line | cut -d '=' -f 1) - echo $var_name - var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")" - echo "$var_name=$var_value" >> .env - fi - done < "$env_example_file" - env: - SECRETS: '${{ toJson(secrets) }}' - - - name: Install dependencies - working-directory: docs - run: yarn install - - name: Build website - working-directory: docs - run: export NODE_ENV=production && yarn build && cp _redirects out/_redirects && cp _headers out/_headers - - - name: Publish to Cloudflare Pages Production - uses: cloudflare/pages-action@v1 - with: - apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }} - accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }} - projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }} - directory: ./docs/out - branch: main - # Optional: Enable this if you want to have GitHub Deployments triggered - gitHubToken: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/jan-docs.yml b/.github/workflows/jan-docs.yml index 24673ef22..f64f93f66 100644 --- a/.github/workflows/jan-docs.yml +++ b/.github/workflows/jan-docs.yml @@ -26,7 +26,7 @@ jobs: deployments: write pull-requests: write steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: actions/setup-node@v3 with: node-version: 20 diff --git a/.github/workflows/jan-linter-and-test.yml b/.github/workflows/jan-linter-and-test.yml index 7ad7f67aa..e2f437837 100644 --- a/.github/workflows/jan-linter-and-test.yml +++ b/.github/workflows/jan-linter-and-test.yml @@ -1,4 +1,4 @@ -name: Test - Linter & Playwright +name: Linter & Test on: workflow_dispatch: push: diff --git a/README.md b/README.md index 5b3655ba8..41a41af46 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Jan - Local AI Assistant -![Jan banner](./JanBanner.png) +![Jan AI](docs/src/pages/docs/_assets/jan-app.png)

@@ -12,62 +12,50 @@

- Getting Started - - Docs - - Changelog - - Bug reports + Getting Started + - Docs + - Changelog + - Bug reports - Discord

-Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**. - -**⚠️ Jan is in active development.** +Jan is an AI assistant that can run 100% offline on your device. Download and run LLMs with +**full control** and **privacy**. ## Installation -Because clicking a button is still the easiest way to get started: +The easiest way to get started is by downloading one of the following versions for your respective operating system: - - - - -
Platform StableBeta Nightly
Windows jan.exejan.exe jan.exe
macOS jan.dmgjan.dmg jan.dmg
Linux (deb) jan.debjan.deb jan.deb
Linux (AppImage) jan.AppImagejan.AppImage jan.AppImage
Download from [jan.ai](https://jan.ai/) or [GitHub Releases](https://github.com/menloresearch/jan/releases). -## Demo - - ## Features @@ -149,13 +137,12 @@ For detailed compatibility, check our [installation guides](https://jan.ai/docs/ ## Troubleshooting -When things go sideways (they will): +If things go sideways: 1. Check our [troubleshooting docs](https://jan.ai/docs/troubleshooting) 2. Copy your error logs and system specs 3. Ask for help in our [Discord](https://discord.gg/FTk2MvZwJH) `#🆘|jan-help` channel -We keep logs for 24 hours, so don't procrastinate on reporting issues. ## Contributing @@ -175,15 +162,6 @@ Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for the full spiel - **Jobs**: hr@jan.ai - **General Discussion**: [Discord](https://discord.gg/FTk2MvZwJH) -## Trust & Safety - -**Friendly reminder**: We're not trying to scam you. - -- We won't ask for personal information -- Jan is completely free (no premium version exists) -- We don't have a cryptocurrency or ICO -- We're bootstrapped and not seeking your investment (yet) - ## License Apache 2.0 - Because sharing is caring. diff --git a/docs/src/pages/docs/_assets/hf_hub.png b/docs/src/pages/docs/_assets/hf_hub.png new file mode 100644 index 000000000..ad059c49a Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_hub.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano.png b/docs/src/pages/docs/_assets/hf_jan_nano.png new file mode 100644 index 000000000..147a5c70e Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_2.png b/docs/src/pages/docs/_assets/hf_jan_nano_2.png new file mode 100644 index 000000000..10c410240 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_2.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_3.png b/docs/src/pages/docs/_assets/hf_jan_nano_3.png new file mode 100644 index 000000000..dac240d29 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_3.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_4.png b/docs/src/pages/docs/_assets/hf_jan_nano_4.png new file mode 100644 index 000000000..552f07b06 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_4.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_5.png b/docs/src/pages/docs/_assets/hf_jan_nano_5.png new file mode 100644 index 000000000..b322f0f93 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_5.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_6.png b/docs/src/pages/docs/_assets/hf_jan_nano_6.png new file mode 100644 index 000000000..c8be2b707 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_6.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_7.png b/docs/src/pages/docs/_assets/hf_jan_nano_7.png new file mode 100644 index 000000000..2a8ba8438 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_7.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_8.png b/docs/src/pages/docs/_assets/hf_jan_nano_8.png new file mode 100644 index 000000000..4e1885a8e Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_8.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_nano_9.png b/docs/src/pages/docs/_assets/hf_jan_nano_9.png new file mode 100644 index 000000000..09575c541 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_nano_9.png differ diff --git a/docs/src/pages/docs/_assets/hf_jan_setup.png b/docs/src/pages/docs/_assets/hf_jan_setup.png new file mode 100644 index 000000000..2d917539b Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_jan_setup.png differ diff --git a/docs/src/pages/docs/_assets/hf_providers.png b/docs/src/pages/docs/_assets/hf_providers.png new file mode 100644 index 000000000..1f8e4daf7 Binary files /dev/null and b/docs/src/pages/docs/_assets/hf_providers.png differ diff --git a/docs/src/pages/docs/remote-models/_meta.json b/docs/src/pages/docs/remote-models/_meta.json index 39660be88..9ef524352 100644 --- a/docs/src/pages/docs/remote-models/_meta.json +++ b/docs/src/pages/docs/remote-models/_meta.json @@ -26,5 +26,9 @@ "openrouter": { "title": "OpenRouter", "href": "/docs/remote-models/openrouter" + }, + "huggingface": { + "title": "Hugging Face", + "href": "/docs/remote-models/huggingface" } } diff --git a/docs/src/pages/docs/remote-models/huggingface.mdx b/docs/src/pages/docs/remote-models/huggingface.mdx new file mode 100644 index 000000000..07f2103d2 --- /dev/null +++ b/docs/src/pages/docs/remote-models/huggingface.mdx @@ -0,0 +1,152 @@ +--- +title: Hugging Face +description: Learn how to integrate Hugging Face models with Jan using the Router or Inference Endpoints. +keywords: + [ + Hugging Face, + Jan, + Jan AI, + Hugging Face Router, + Hugging Face Inference Endpoints, + Hugging Face API, + Hugging Face Integration, + Hugging Face API Integration + ] +--- + +import { Callout, Steps } from 'nextra/components' +import { Settings, Plus } from 'lucide-react' + +# Hugging Face + +Jan supports Hugging Face models through two methods: the new **HF Router** (recommended) and **Inference Endpoints**. Both methods require a Hugging Face token and **billing to be set up**. + +![HuggingFace Inference Providers](../_assets/hf_providers.png) + +## Option 1: HF Router (Recommended) + +The HF Router provides access to models from multiple providers (Replicate, Together AI, SambaNova, Fireworks, Cohere, and more) through a single endpoint. + + + +### Step 1: Get Your HF Token + +Visit [Hugging Face Settings > Access Tokens](https://huggingface.co/settings/tokens) and create a token. Make sure you have billing set up on your account. + +### Step 2: Configure Jan + +1. Go to **Settings** > **Model Providers** > **HuggingFace** +2. Enter your HF token +3. Use this URL: `https://router.huggingface.co/v1` + +![Jan HF Setup](../_assets/hf_jan_setup.png) + +You can find out more about the HF Router [here](https://huggingface.co/docs/inference-providers/index). + +### Step 3: Start Using Models + +Jan comes with three HF Router models pre-configured. Select one and start chatting immediately. + + + + +The HF Router automatically routes your requests to the best available provider for each model, giving you access to a wide variety of models without managing individual endpoints. + + +## Option 2: HF Inference Endpoints + +For more control over specific models and deployment configurations, you can use Hugging Face Inference Endpoints. + + + +### Step 1: Navigate to the HuggingFace Model Hub + +Visit the [Hugging Face Model Hub](https://huggingface.co/models) (make sure you are logged in) and pick the model you want to use. + +![HuggingFace Model Hub](../_assets/hf_hub.png) + +### Step 2: Configure HF Inference Endpoint and Deploy + +After you have selected the model you want to use, click on the **Deploy** button and select a deployment method. We will select HF Inference Endpoints for this one. + +![HuggingFace Deployment](../_assets/hf_jan_nano.png) +
+ +This will take you to the deployment set up page. For this example, we will leave the default settings as they are under the GPU tab and click on **Create Endpoint**. + +![HuggingFace Deployment](../_assets/hf_jan_nano_2.png) +
+ +Once your endpoint is ready, test that it works on the **Test your endpoint** tab. + +![HuggingFace Deployment](../_assets/hf_jan_nano_3.png) +
+ +If you get a response, you can click on **Copy** to copy the endpoint URL and API key. + + + You will need to be logged into the HuggingFace Inference Endpoints and have a credit card on file to deploy a model. + + +### Step 3: Configure Jan + +If you do not have an API key you can create one under **Settings** > **Access Tokens** [here](https://huggingface.co/settings/tokens). Once you finish, copy the token and add it to Jan alongside your endpoint URL at **Settings** > **Model Providers** > **HuggingFace**. + +**3.1 HF Token** +![Get Token](../_assets/hf_jan_nano_5.png) +
+ +**3.2 HF Endpoint URL** +![Endpoint URL](../_assets/hf_jan_nano_4.png) +
+ +**3.3 Jan Settings** +![Jan Settings](../_assets/hf_jan_nano_6.png) + + +Make sure to add `/v1/` to the end of your endpoint URL. This is required by the OpenAI API. + + +**3.4 Add Model Details** +![Add Model Details](../_assets/hf_jan_nano_7.png) + +### Step 4: Start Using the Model + +Now you can start using the model in any chat. + +![Start Using the Model](../_assets/hf_jan_nano_8.png) + +If you want to learn how to use Jan Nano with MCP, check out [the guide here](../jan-models/jan-nano-32). +
+ +
+ +## Available Hugging Face Models + +**Option 1 (HF Router):** Access to models from multiple providers as shown in the providers image above. + +**Option 2 (Inference Endpoints):** You can follow the steps above with a large amount of models on Hugging Face and bring them to Jan. Check out other models in the [Hugging Face Model Hub](https://huggingface.co/models). + +## Troubleshooting + +Common issues and solutions: + +**1. Started a chat but the model is not responding** +- Verify your API_KEY/HF_TOKEN is correct and not expired +- Ensure you have billing set up on your HF account +- For Inference Endpoints: Ensure the model you're trying to use is running again since, after a while, they go idle so that you don't get charged when you are not using it + +![Model Running](../_assets/hf_jan_nano_9.png) + +**2. Connection Problems** +- Check your internet connection +- Verify Hugging Face's system status +- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs) + +**3. Model Unavailable** +- Confirm your API key has access to the model +- Check if you're using the correct model ID +- Verify your Hugging Face account has the necessary permissions + +Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the +[Hugging Face's documentation](https://docs.huggingface.co/en/inference-endpoints/index). diff --git a/docs/src/pages/post/_assets/gpt-oss locally.jpeg b/docs/src/pages/post/_assets/gpt-oss locally.jpeg new file mode 100644 index 000000000..7d0e59717 Binary files /dev/null and b/docs/src/pages/post/_assets/gpt-oss locally.jpeg differ diff --git a/docs/src/pages/post/_assets/jan gpt-oss.jpeg b/docs/src/pages/post/_assets/jan gpt-oss.jpeg new file mode 100644 index 000000000..2e9e6b9d6 Binary files /dev/null and b/docs/src/pages/post/_assets/jan gpt-oss.jpeg differ diff --git a/docs/src/pages/post/_assets/jan hub gpt-oss locally.jpeg b/docs/src/pages/post/_assets/jan hub gpt-oss locally.jpeg new file mode 100644 index 000000000..04b0f5ca6 Binary files /dev/null and b/docs/src/pages/post/_assets/jan hub gpt-oss locally.jpeg differ diff --git a/docs/src/pages/post/_assets/run gpt-oss locally in jan.jpeg b/docs/src/pages/post/_assets/run gpt-oss locally in jan.jpeg new file mode 100644 index 000000000..68b6d725c Binary files /dev/null and b/docs/src/pages/post/_assets/run gpt-oss locally in jan.jpeg differ diff --git a/docs/src/pages/post/run-gpt-oss-locally.mdx b/docs/src/pages/post/run-gpt-oss-locally.mdx new file mode 100644 index 000000000..5f71e8b45 --- /dev/null +++ b/docs/src/pages/post/run-gpt-oss-locally.mdx @@ -0,0 +1,211 @@ +--- +title: "Run OpenAI's gpt-oss locally in 5 mins (Beginner Guide)" +description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally. Step-by-step setup with Jan AI for private, offline AI conversations." +tags: OpenAI, gpt-oss, local AI, Jan, privacy, Apache-2.0, llama.cpp, Ollama, LM Studio +categories: guides +date: 2025-08-06 +ogImage: assets/gpt-oss%20locally.jpeg +twitter: + card: summary_large_image + site: "@jandotai" + title: "Run OpenAI's gpt-oss Locally in 5 Minutes (Beginner Guide)" + description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally with Jan AI for private, offline conversations." + image: assets/gpt-oss%20locally.jpeg +--- +import { Callout } from 'nextra/components' +import CTABlog from '@/components/Blog/CTA' + +# Run OpenAI's gpt-oss Locally in 5 mins + +![gpt-oss running locally in Jan interface](./_assets/gpt-oss%20locally.jpeg) + +OpenAI launched [gpt-oss](https://openai.com/index/introducing-gpt-oss/), marking their return to open-source AI after GPT-2. This model is designed to run locally on consumer hardware. This guide shows you how to install and run gpt-oss on your computer for private, offline AI conversations. + +## What is gpt-oss? + +gpt-oss is OpenAI's open-source large language model, released under the Apache-2.0 license. Unlike ChatGPT, gpt-oss: + +- Runs completely offline - No internet required after setup +- 100% private - Your conversations never leave your device +- Unlimited usage - No token limits or rate limiting +- Free forever - No subscription fees +- Commercial use allowed - Apache-2.0 license permits business use + +Running AI models locally means everything happens on your own hardware, giving you complete control over your data and conversations. + +## gpt-oss System Requirements + +| Component | Minimum | Recommended | +|-----------|---------|-------------| +| **RAM** | 16 GB | 32 GB+ | +| **Storage** | 11+ GB free | 25 GB+ free | +| **CPU** | 4 cores | 8+ cores | +| **GPU** | Optional | Modern GPU with 6GB+ VRAM recommended | +| **OS** | Windows 10+, macOS 11+, Linux | Latest versions | + +**Installation apps available:** +- **Jan** (Recommended - easiest setup) +- **llama.cpp** (Command line) +- **Ollama** (Docker-based) +- **LM Studio** (GUI alternative) + +## How to install gpt-oss locally with Jan (5 mins) + +### Step 1: Download Jan + +First download Jan to run gpt-oss locally: [Download Jan AI](https://jan.ai/) + + +Jan is the simplest way to run AI models locally. It automatically handles CPU/GPU optimization, provides a clean chat interface, and requires zero technical knowledge. + + +### Step 2: Install gpt-oss Model (2-3 minutes) + +![Jan Hub showing gpt-oss model in the hub](./_assets/jan%20hub%20gpt-oss%20locally.jpeg) + +1. Open Jan Hub -> search "gpt-oss" (it appears at the top) +2. Click Download and wait for completion (~11GB download) +3. Installation is automatic - Jan handles everything + +### Step 3: Start using gpt-oss offline (30 seconds) + +![Jan interface with gpt-oss model selected and ready to chat](./_assets/jan%20gpt-oss.jpeg) + +1. Go to New Chat → select gpt-oss-20b from model picker +2. Start chatting - Jan automatically optimizes for your hardware +3. You're done! Your AI conversations now stay completely private + +Success: Your gpt-oss setup is complete. No internet required for chatting, unlimited usage, zero subscription fees. + +## Jan with gpt-oss vs ChatGPT vs other Local AI Models + +| Feature | gpt-oss (Local) | ChatGPT Plus | Claude Pro | Other Local Models | +|---------|----------------|--------------|------------|-------------------| +| Cost | Free forever | $20/month | $20/month | Free | +| Privacy | 100% private | Data sent to OpenAI | Data sent to Anthropic | 100% private | +| Internet | Offline after setup | Requires internet | Requires internet | Offline | +| Usage limits | Unlimited | Rate limited | Rate limited | Unlimited | +| Performance | Good (hardware dependent) | Excellent | Excellent | Varies | +| Setup difficulty | Easy with Jan | None | None | Varies | + +## Alternative Installation Methods + +### Option 1: Jan (Recommended) + +- Best for: Complete beginners, users wanting GUI interface +- Setup time: 5 minutes +- Difficulty: Very Easy + +Already covered above - [Download Jan](https://jan.ai/) + +### Option 2: llama.cpp (Command Line) + +- Best for: Developers, terminal users, custom integrations +- Setup time: 10-15 minutes +- Difficulty: Intermediate + +```bash +# macOS +brew install llama-cpp + +# Windows: grab Windows exe from releases +curl -L -o gpt-oss-20b.gguf https://huggingface.co/openai/gpt-oss-20b-gguf/resolve/main/gpt-oss-20b.gguf +./main -m gpt-oss-20b.gguf --chat-simple + +# Add GPU acceleration (adjust -ngl value based on your GPU VRAM) +./main -m gpt-oss-20b.gguf --chat-simple -ngl 20 +``` + +### Option 3: Ollama (Docker-Based) + +Best for: Docker users, server deployments +Setup time: 5-10 minutes +Difficulty: Intermediate + +```bash +# Install from https://ollama.com +ollama run gpt-oss:20b +``` + +### Option 4: LM Studio (GUI Alternative) + +Best for: Users wanting GUI but not Jan +Setup time: 10 minutes +Difficulty: Easy + +1. Download LM Studio from official website +2. Go to Models → search "gpt-oss-20b (GGUF)" +3. Download the model (wait for completion) +4. Go to Chat tab → select the model and start chatting + +## gpt-oss Performance & Troubleshooting + +### Expected Performance Benchmarks + +| Hardware Setup | First Response | Subsequent Responses | Tokens/Second | +|---------------|---------------|---------------------|---------------| +| **16GB RAM + CPU only** | 30-45 seconds | 3-6 seconds | 3-8 tokens/sec | +| **32GB RAM + RTX 3060** | 15-25 seconds | 1-3 seconds | 15-25 tokens/sec | +| **32GB RAM + RTX 4080+** | 8-15 seconds | 1-2 seconds | 25-45 tokens/sec | + +### Common Issues & Solutions + +Performance optimization tips: +- First response is slow: Normal - kernels compile once, then speed up dramatically +- Out of VRAM error: Reduce context length in settings or switch to CPU mode +- Out of memory: Close memory-heavy apps (Chrome, games, video editors) +- Slow responses: Check if other apps are using GPU/CPU heavily + +Quick fixes: +1. Restart Jan if responses become slow +2. Lower context window from 4096 to 2048 tokens +3. Enable CPU mode if GPU issues persist +4. Free up RAM by closing unused applications + +## Frequently Asked Questions (FAQ) + +### Is gpt-oss completely free? +Yes! gpt-oss is 100% free under Apache-2.0 license. No subscription fees, no token limits, no hidden costs. + +### How much internet data does gpt-oss use? +Only for the initial 11GB download. After installation, gpt-oss works completely offline with zero internet usage. + +### Can I use gpt-oss for commercial projects? +Absolutely! The Apache-2.0 license permits commercial use, modification, and distribution. + +### Is gpt-oss better than ChatGPT? +gpt-oss offers different advantages: complete privacy, unlimited usage, offline capability, and no costs. ChatGPT may have better performance but requires internet and subscriptions. + +### What happens to my conversations with gpt-oss? +Your conversations stay 100% on your device. Nothing is sent to OpenAI, Jan, or any external servers. + +### Can I run gpt-oss on a Mac with 8GB RAM? +No, gpt-oss requires minimum 16GB RAM. Consider upgrading your RAM or using cloud-based alternatives. + +### How do I update gpt-oss to newer versions? +Jan automatically notifies you of updates. Simply click update in Jan Hub when new versions are available. + +## Why Choose gpt-oss Over ChatGPT Plus? + +gpt-oss advantages: +- $0/month vs $20/month for ChatGPT Plus +- 100% private - no data leaves your device +- Unlimited usage - no rate limits or restrictions +- Works offline - no internet required after setup +- Commercial use allowed - build businesses with it + +When to choose ChatGPT Plus instead: +- You need the absolute best performance +- You don't want to manage local installation +- You have less than 16GB RAM + +## Get started with gpt-oss today + +![gpt-oss running locally with complete privacy](./_assets/run%20gpt-oss%20locally%20in%20jan.jpeg) + +Ready to try gpt-oss? +- Download Jan: [https://jan.ai/](https://jan.ai/) +- View source code: [https://github.com/menloresearch/jan](https://github.com/menloresearch/jan) +- Need help? Check our [local AI guide](/post/run-ai-models-locally) for beginners + + \ No newline at end of file diff --git a/extensions/llamacpp-extension/settings.json b/extensions/llamacpp-extension/settings.json index 363822f9a..46c4995ff 100644 --- a/extensions/llamacpp-extension/settings.json +++ b/extensions/llamacpp-extension/settings.json @@ -25,18 +25,6 @@ "controllerType": "checkbox", "controllerProps": { "value": true } }, - { - "key": "chat_template", - "title": "Custom Jinja Chat template", - "description": "Custom Jinja chat_template to be used for the model", - "controllerType": "input", - "controllerProps": { - "value": "", - "placeholder": "e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)", - "type": "text", - "textAlign": "right" - } - }, { "key": "threads", "title": "Threads", diff --git a/extensions/llamacpp-extension/src/backend.ts b/extensions/llamacpp-extension/src/backend.ts index e8068f63b..3bf6a2675 100644 --- a/extensions/llamacpp-extension/src/backend.ts +++ b/extensions/llamacpp-extension/src/backend.ts @@ -50,14 +50,18 @@ export async function listSupportedBackends(): Promise< if (features.avx2) supportedBackends.push('linux-avx2-x64') if (features.avx512) supportedBackends.push('linux-avx512-x64') if (features.cuda11) { - if (features.avx512) supportedBackends.push('linux-avx512-cuda-cu11.7-x64') - else if (features.avx2) supportedBackends.push('linux-avx2-cuda-cu11.7-x64') + if (features.avx512) + supportedBackends.push('linux-avx512-cuda-cu11.7-x64') + else if (features.avx2) + supportedBackends.push('linux-avx2-cuda-cu11.7-x64') else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64') else supportedBackends.push('linux-noavx-cuda-cu11.7-x64') } if (features.cuda12) { - if (features.avx512) supportedBackends.push('linux-avx512-cuda-cu12.0-x64') - else if (features.avx2) supportedBackends.push('linux-avx2-cuda-cu12.0-x64') + if (features.avx512) + supportedBackends.push('linux-avx512-cuda-cu12.0-x64') + else if (features.avx2) + supportedBackends.push('linux-avx2-cuda-cu12.0-x64') else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64') else supportedBackends.push('linux-noavx-cuda-cu12.0-x64') } @@ -256,10 +260,16 @@ async function _getSupportedFeatures() { if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0) features.cuda12 = true } - - if (gpuInfo.vulkan_info?.api_version) features.vulkan = true + // Vulkan support check - only discrete GPUs with 6GB+ VRAM + if ( + gpuInfo.vulkan_info?.api_version && + gpuInfo.vulkan_info?.device_type === 'DISCRETE_GPU' && + gpuInfo.total_memory >= 6 * 1024 + ) { + // 6GB (total_memory is in MB) + features.vulkan = true + } } - return features } diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts index 140b08418..92ceaad60 100644 --- a/extensions/llamacpp-extension/src/index.ts +++ b/extensions/llamacpp-extension/src/index.ts @@ -39,6 +39,7 @@ type LlamacppConfig = { auto_unload: boolean chat_template: string n_gpu_layers: number + override_tensor_buffer_t: string ctx_size: number threads: number threads_batch: number @@ -144,7 +145,6 @@ export default class llamacpp_extension extends AIEngine { readonly providerId: string = 'llamacpp' private config: LlamacppConfig - private activeSessions: Map = new Map() private providerPath!: string private apiSecret: string = 'JustAskNow' private pendingDownloads: Map> = new Map() @@ -770,16 +770,6 @@ export default class llamacpp_extension extends AIEngine { override async onUnload(): Promise { // Terminate all active sessions - for (const [_, sInfo] of this.activeSessions) { - try { - await this.unload(sInfo.model_id) - } catch (error) { - logger.error(`Failed to unload model ${sInfo.model_id}:`, error) - } - } - - // Clear the sessions map - this.activeSessions.clear() } onSettingUpdate(key: string, value: T): void { @@ -1103,67 +1093,13 @@ export default class llamacpp_extension extends AIEngine { * Function to find a random port */ private async getRandomPort(): Promise { - const MAX_ATTEMPTS = 20000 - let attempts = 0 - - while (attempts < MAX_ATTEMPTS) { - const port = Math.floor(Math.random() * 1000) + 3000 - - const isAlreadyUsed = Array.from(this.activeSessions.values()).some( - (info) => info.port === port - ) - - if (!isAlreadyUsed) { - const isAvailable = await invoke('is_port_available', { port }) - if (isAvailable) return port - } - - attempts++ + try { + const port = await invoke('get_random_port') + return port + } catch { + logger.error('Unable to find a suitable port') + throw new Error('Unable to find a suitable port for model') } - - throw new Error('Failed to find an available port for the model to load') - } - - private async sleep(ms: number): Promise { - return new Promise((resolve) => setTimeout(resolve, ms)) - } - - private async waitForModelLoad( - sInfo: SessionInfo, - timeoutMs = 240_000 - ): Promise { - await this.sleep(500) // Wait before first check - const start = Date.now() - while (Date.now() - start < timeoutMs) { - try { - const res = await fetch(`http://localhost:${sInfo.port}/health`) - - if (res.status === 503) { - const body = await res.json() - const msg = body?.error?.message ?? 'Model loading' - logger.info(`waiting for model load... (${msg})`) - } else if (res.ok) { - const body = await res.json() - if (body.status === 'ok') { - return - } else { - logger.warn('Unexpected OK response from /health:', body) - } - } else { - logger.warn(`Unexpected status ${res.status} from /health`) - } - } catch (e) { - await this.unload(sInfo.model_id) - throw new Error(`Model appears to have crashed: ${e}`) - } - - await this.sleep(800) // Retry interval - } - - await this.unload(sInfo.model_id) - throw new Error( - `Timed out loading model after ${timeoutMs}... killing llamacpp` - ) } override async load( @@ -1171,7 +1107,7 @@ export default class llamacpp_extension extends AIEngine { overrideSettings?: Partial, isEmbedding: boolean = false ): Promise { - const sInfo = this.findSessionByModel(modelId) + const sInfo = await this.findSessionByModel(modelId) if (sInfo) { throw new Error('Model already loaded!!') } @@ -1262,6 +1198,14 @@ export default class llamacpp_extension extends AIEngine { args.push('--jinja') args.push('--reasoning-format', 'none') args.push('-m', modelPath) + // For overriding tensor buffer type, useful where + // massive MOE models can be made faster by keeping attention on the GPU + // and offloading the expert FFNs to the CPU. + // This is an expert level settings and should only be used by people + // who knows what they are doing. + // Takes a regex with matching tensor name as input + if (cfg.override_tensor_buffer_t) + args.push('--override-tensor', cfg.override_tensor_buffer_t) args.push('-a', modelId) args.push('--port', String(port)) if (modelConfig.mmproj_path) { @@ -1333,26 +1277,20 @@ export default class llamacpp_extension extends AIEngine { libraryPath, args, }) - - // Store the session info for later use - this.activeSessions.set(sInfo.pid, sInfo) - await this.waitForModelLoad(sInfo) - return sInfo } catch (error) { - logger.error('Error loading llama-server:\n', error) - throw new Error(`Failed to load llama-server: ${error}`) + logger.error('Error in load command:\n', error) + throw new Error(`Failed to load model:\n${error}`) } } override async unload(modelId: string): Promise { - const sInfo: SessionInfo = this.findSessionByModel(modelId) + const sInfo: SessionInfo = await this.findSessionByModel(modelId) if (!sInfo) { throw new Error(`No active session found for model: ${modelId}`) } const pid = sInfo.pid try { - this.activeSessions.delete(pid) // Pass the PID as the session_id const result = await invoke('unload_llama_model', { @@ -1364,13 +1302,11 @@ export default class llamacpp_extension extends AIEngine { logger.info(`Successfully unloaded model with PID ${pid}`) } else { logger.warn(`Failed to unload model: ${result.error}`) - this.activeSessions.set(sInfo.pid, sInfo) } return result } catch (error) { logger.error('Error in unload command:', error) - this.activeSessions.set(sInfo.pid, sInfo) return { success: false, error: `Failed to unload model: ${error}`, @@ -1493,17 +1429,21 @@ export default class llamacpp_extension extends AIEngine { } } - private findSessionByModel(modelId: string): SessionInfo | undefined { - return Array.from(this.activeSessions.values()).find( - (session) => session.model_id === modelId - ) + private async findSessionByModel(modelId: string): Promise { + try { + let sInfo = await invoke('find_session_by_model', {modelId}) + return sInfo + } catch (e) { + logger.error(e) + throw new Error(String(e)) + } } override async chat( opts: chatCompletionRequest, abortController?: AbortController ): Promise> { - const sessionInfo = this.findSessionByModel(opts.model) + const sessionInfo = await this.findSessionByModel(opts.model) if (!sessionInfo) { throw new Error(`No active session found for model: ${opts.model}`) } @@ -1519,7 +1459,6 @@ export default class llamacpp_extension extends AIEngine { throw new Error('Model appears to have crashed! Please reload!') } } else { - this.activeSessions.delete(sessionInfo.pid) throw new Error('Model have crashed! Please reload!') } const baseUrl = `http://localhost:${sessionInfo.port}/v1` @@ -1568,11 +1507,13 @@ export default class llamacpp_extension extends AIEngine { } override async getLoadedModels(): Promise { - let lmodels: string[] = [] - for (const [_, sInfo] of this.activeSessions) { - lmodels.push(sInfo.model_id) - } - return lmodels + try { + let models: string[] = await invoke('get_loaded_models') + return models + } catch (e) { + logger.error(e) + throw new Error(e) + } } async getDevices(): Promise { @@ -1602,7 +1543,7 @@ export default class llamacpp_extension extends AIEngine { } async embed(text: string[]): Promise { - let sInfo = this.findSessionByModel('sentence-transformer-mini') + let sInfo = await this.findSessionByModel('sentence-transformer-mini') if (!sInfo) { const downloadedModelList = await this.list() if ( diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 0f334d178..ca1a54bba 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -63,8 +63,12 @@ nix = "=0.30.1" [target.'cfg(windows)'.dependencies] libc = "0.2.172" +windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] } [target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies] tauri-plugin-updater = "2" once_cell = "1.18" tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] } + +[target.'cfg(windows)'.dev-dependencies] +tempfile = "3.20.0" diff --git a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs index ffa6cfe92..b95e17010 100644 --- a/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs +++ b/src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs @@ -1,7 +1,9 @@ use base64::{engine::general_purpose, Engine as _}; use hmac::{Hmac, Mac}; +use rand::{rngs::StdRng, Rng, SeedableRng}; use serde::{Deserialize, Serialize}; use sha2::Sha256; +use std::collections::HashSet; use std::path::PathBuf; use std::process::Stdio; use std::time::Duration; @@ -67,13 +69,39 @@ pub struct DeviceInfo { pub free: i32, } +#[cfg(windows)] +use std::os::windows::ffi::OsStrExt; + +#[cfg(windows)] +use std::ffi::OsStr; + +#[cfg(windows)] +use windows_sys::Win32::Storage::FileSystem::GetShortPathNameW; + +#[cfg(windows)] +pub fn get_short_path>(path: P) -> Option { + let wide: Vec = OsStr::new(path.as_ref()) + .encode_wide() + .chain(Some(0)) + .collect(); + + let mut buffer = vec![0u16; 260]; + let len = unsafe { GetShortPathNameW(wide.as_ptr(), buffer.as_mut_ptr(), buffer.len() as u32) }; + + if len > 0 { + Some(String::from_utf16_lossy(&buffer[..len as usize])) + } else { + None + } +} + // --- Load Command --- #[tauri::command] pub async fn load_llama_model( state: State<'_, AppState>, backend_path: &str, library_path: Option<&str>, - args: Vec, + mut args: Vec, ) -> ServerResult { let mut process_map = state.llama_server_process.lock().await; @@ -105,13 +133,38 @@ pub async fn load_llama_model( 8080 } }; - - let model_path = args + // FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH + let model_path_index = args .iter() .position(|arg| arg == "-m") - .and_then(|i| args.get(i + 1)) - .cloned() - .unwrap_or_default(); + .ok_or(ServerError::LlamacppError("Missing `-m` flag".into()))?; + + let model_path = args + .get(model_path_index + 1) + .ok_or(ServerError::LlamacppError("Missing path after `-m`".into()))? + .clone(); + + let model_path_pb = PathBuf::from(model_path); + if !model_path_pb.exists() { + return Err(ServerError::LlamacppError(format!( + "Invalid or inaccessible model path: {}", + model_path_pb.display().to_string(), + ))); + } + #[cfg(windows)] + { + // use short path on Windows + if let Some(short) = get_short_path(&model_path_pb) { + args[model_path_index + 1] = short; + } else { + args[model_path_index + 1] = model_path_pb.display().to_string(); + } + } + #[cfg(not(windows))] + { + args[model_path_index + 1] = model_path_pb.display().to_string(); + } + // ----------------------------------------------------------------- let api_key = args .iter() @@ -181,7 +234,6 @@ pub async fn load_llama_model( // Create channels for communication between tasks let (ready_tx, mut ready_rx) = mpsc::channel::(1); - let (error_tx, mut error_rx) = mpsc::channel::(1); // Spawn task to monitor stdout for readiness let _stdout_task = tokio::spawn(async move { @@ -228,20 +280,10 @@ pub async fn load_llama_model( // Check for critical error indicators that should stop the process let line_lower = line.to_string().to_lowercase(); - if line_lower.contains("error loading model") - || line_lower.contains("unknown model architecture") - || line_lower.contains("fatal") - || line_lower.contains("cuda error") - || line_lower.contains("out of memory") - || line_lower.contains("error") - || line_lower.contains("failed") - { - let _ = error_tx.send(line.to_string()).await; - } // Check for readiness indicator - llama-server outputs this when ready - else if line.contains("server is listening on") - || line.contains("starting the main loop") - || line.contains("server listening on") + if line_lower.contains("server is listening on") + || line_lower.contains("starting the main loop") + || line_lower.contains("server listening on") { log::info!("Server appears to be ready based on stderr: '{}'", line); let _ = ready_tx.send(true).await; @@ -279,26 +321,6 @@ pub async fn load_llama_model( log::info!("Server is ready to accept requests!"); break; } - // Error occurred - Some(error_msg) = error_rx.recv() => { - log::error!("Server encountered an error: {}", error_msg); - - // Give process a moment to exit naturally - tokio::time::sleep(Duration::from_millis(100)).await; - - // Check if process already exited - if let Some(status) = child.try_wait()? { - log::info!("Process exited with code {:?}", status); - return Err(ServerError::LlamacppError(error_msg)); - } else { - log::info!("Process still running, killing it..."); - let _ = child.kill().await; - } - - // Get full stderr output - let stderr_output = stderr_task.await.unwrap_or_default(); - return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output))); - } // Check for process exit more frequently _ = tokio::time::sleep(Duration::from_millis(50)) => { // Check if process exited @@ -332,7 +354,7 @@ pub async fn load_llama_model( pid: pid.clone(), port: port, model_id: model_id, - model_path: model_path, + model_path: model_path_pb.display().to_string(), api_key: api_key, }; @@ -704,16 +726,88 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result< } // check port availability -#[tauri::command] -pub fn is_port_available(port: u16) -> bool { +fn is_port_available(port: u16) -> bool { std::net::TcpListener::bind(("127.0.0.1", port)).is_ok() } +#[tauri::command] +pub async fn get_random_port(state: State<'_, AppState>) -> Result { + const MAX_ATTEMPTS: u32 = 20000; + let mut attempts = 0; + let mut rng = StdRng::from_entropy(); + + // Get all active ports from sessions + let map = state.llama_server_process.lock().await; + + let used_ports: HashSet = map + .values() + .filter_map(|session| { + // Convert valid ports to u16 (filter out placeholder ports like -1) + if session.info.port > 0 && session.info.port <= u16::MAX as i32 { + Some(session.info.port as u16) + } else { + None + } + }) + .collect(); + + drop(map); // unlock early + + while attempts < MAX_ATTEMPTS { + let port = rng.gen_range(3000..4000); + + if used_ports.contains(&port) { + attempts += 1; + continue; + } + + if is_port_available(port) { + return Ok(port); + } + + attempts += 1; + } + + Err("Failed to find an available port for the model to load".into()) +} + +// find session +#[tauri::command] +pub async fn find_session_by_model( + model_id: String, + state: State<'_, AppState>, +) -> Result, String> { + let map = state.llama_server_process.lock().await; + + let session_info = map + .values() + .find(|backend_session| backend_session.info.model_id == model_id) + .map(|backend_session| backend_session.info.clone()); + + Ok(session_info) +} + +// get running models +#[tauri::command] +pub async fn get_loaded_models(state: State<'_, AppState>) -> Result, String> { + let map = state.llama_server_process.lock().await; + + let model_ids = map + .values() + .map(|backend_session| backend_session.info.model_id.clone()) + .collect(); + + Ok(model_ids) +} + // tests // #[cfg(test)] mod tests { use super::*; + use std::path::PathBuf; + #[cfg(windows)] + use tempfile; #[test] fn test_parse_multiple_devices() { @@ -899,4 +993,41 @@ Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)"#; let (_start, content) = result.unwrap(); assert_eq!(content, "8128 MiB, 8128 MiB free"); } + #[test] + fn test_path_with_uncommon_dir_names() { + const UNCOMMON_DIR_NAME: &str = "тест-你好-éàç-🚀"; + #[cfg(windows)] + { + let dir = tempfile::tempdir().expect("Failed to create temp dir"); + let long_path = dir.path().join(UNCOMMON_DIR_NAME); + std::fs::create_dir(&long_path) + .expect("Failed to create test directory with non-ASCII name"); + let short_path = get_short_path(&long_path); + assert!( + short_path.is_ascii(), + "The resulting short path must be composed of only ASCII characters. Got: {}", + short_path + ); + assert!( + PathBuf::from(&short_path).exists(), + "The returned short path must exist on the filesystem" + ); + assert_ne!( + short_path, + long_path.to_str().unwrap(), + "Short path should not be the same as the long path" + ); + } + #[cfg(not(windows))] + { + // On Unix, paths are typically UTF-8 and there's no "short path" concept. + let long_path_str = format!("/tmp/{}", UNCOMMON_DIR_NAME); + let path_buf = PathBuf::from(&long_path_str); + let displayed_path = path_buf.display().to_string(); + assert_eq!( + displayed_path, long_path_str, + "Path with non-ASCII characters should be preserved exactly on non-Windows platforms" + ); + } + } } diff --git a/src-tauri/src/core/utils/mod.rs b/src-tauri/src/core/utils/mod.rs index d6e658633..4e0149e20 100644 --- a/src-tauri/src/core/utils/mod.rs +++ b/src-tauri/src/core/utils/mod.rs @@ -210,3 +210,4 @@ pub fn is_library_available(library: &str) -> bool { } } } + diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 7083cdebe..e449fc739 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -95,7 +95,9 @@ pub fn run() { core::utils::extensions::inference_llamacpp_extension::server::load_llama_model, core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model, core::utils::extensions::inference_llamacpp_extension::server::get_devices, - core::utils::extensions::inference_llamacpp_extension::server::is_port_available, + core::utils::extensions::inference_llamacpp_extension::server::get_random_port, + core::utils::extensions::inference_llamacpp_extension::server::find_session_by_model, + core::utils::extensions::inference_llamacpp_extension::server::get_loaded_models, core::utils::extensions::inference_llamacpp_extension::server::generate_api_key, core::utils::extensions::inference_llamacpp_extension::server::is_process_running, ]) @@ -129,6 +131,12 @@ pub fn run() { if let Err(e) = setup::install_extensions(app.handle().clone(), false) { log::error!("Failed to install extensions: {}", e); } + + #[cfg(any(windows, target_os = "linux"))] + { + use tauri_plugin_deep_link::DeepLinkExt; + app.deep_link().register_all()?; + } setup_mcp(app); Ok(()) }) diff --git a/web-app/src/constants/localStorage.ts b/web-app/src/constants/localStorage.ts index b7e33b006..ae744837b 100644 --- a/web-app/src/constants/localStorage.ts +++ b/web-app/src/constants/localStorage.ts @@ -20,4 +20,5 @@ export const localStorageKey = { lastUsedModel: 'last-used-model', lastUsedAssistant: 'last-used-assistant', favoriteModels: 'favorite-models', + setupCompleted: 'setup-completed', } diff --git a/web-app/src/containers/ModelSetting.tsx b/web-app/src/containers/ModelSetting.tsx index d54e406b2..29d996382 100644 --- a/web-app/src/containers/ModelSetting.tsx +++ b/web-app/src/containers/ModelSetting.tsx @@ -71,7 +71,7 @@ export function ModelSetting({ }) // Call debounced stopModel only when updating ctx_len or ngl - if (key === 'ctx_len' || key === 'ngl') { + if (key === 'ctx_len' || key === 'ngl' || key === 'chat_template') { debouncedStopModel(model.id) } } @@ -101,9 +101,17 @@ export function ModelSetting({
{Object.entries(model.settings || {}).map(([key, value]) => { const config = value as ProviderSetting + return (
-
+

{config.title}

diff --git a/web-app/src/containers/SetupScreen.tsx b/web-app/src/containers/SetupScreen.tsx index 4144e4e0a..e9867b38a 100644 --- a/web-app/src/containers/SetupScreen.tsx +++ b/web-app/src/containers/SetupScreen.tsx @@ -5,6 +5,7 @@ import { route } from '@/constants/routes' import HeaderPage from './HeaderPage' import { isProd } from '@/lib/version' import { useTranslation } from '@/i18n/react-i18next-compat' +import { localStorageKey } from '@/constants/localStorage' function SetupScreen() { const { t } = useTranslation() @@ -12,6 +13,10 @@ function SetupScreen() { const firstItemRemoteProvider = providers.length > 0 ? providers[1].provider : 'openai' + // Check if setup tour has been completed + const isSetupCompleted = + localStorage.getItem(localStorageKey.setupCompleted) === 'true' + return (

@@ -50,7 +55,9 @@ function SetupScreen() { providerName: firstItemRemoteProvider, }} search={{ - step: 'setup_remote_provider', + ...(!isSetupCompleted + ? { step: 'setup_remote_provider' } + : {}), }} >

diff --git a/web-app/src/containers/ThinkingBlock.tsx b/web-app/src/containers/ThinkingBlock.tsx index c4e6742c5..7a1e7b540 100644 --- a/web-app/src/containers/ThinkingBlock.tsx +++ b/web-app/src/containers/ThinkingBlock.tsx @@ -30,14 +30,31 @@ const ThinkingBlock = ({ id, text }: Props) => { const { thinkingState, setThinkingState } = useThinkingStore() const { streamingContent } = useAppState() const { t } = useTranslation() - const loading = !text.includes('') && streamingContent + // Check for thinking formats + const hasThinkTag = text.includes('') && !text.includes('') + const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>') + const loading = (hasThinkTag || hasAnalysisChannel) && streamingContent const isExpanded = thinkingState[id] ?? (loading ? true : false) const handleClick = () => { const newExpandedState = !isExpanded setThinkingState(id, newExpandedState) } - if (!text.replace(/<\/?think>/g, '').trim()) return null + // Extract thinking content from either format + const extractThinkingContent = (text: string) => { + return text + .replace(/<\/?think>/g, '') + .replace(/<\|channel\|>analysis<\|message\|>/g, '') + .replace(/<\|start\|>assistant<\|channel\|>final<\|message\|>/g, '') + .replace(/assistant<\|channel\|>final<\|message\|>/g, '') + .replace(/<\|channel\|>/g, '') // remove any remaining channel markers + .replace(/<\|message\|>/g, '') // remove any remaining message markers + .replace(/<\|start\|>/g, '') // remove any remaining start markers + .trim() + } + + const thinkingContent = extractThinkingContent(text) + if (!thinkingContent) return null return (
{ {isExpanded && (
- /g, '').trim()} /> +
)}
diff --git a/web-app/src/containers/ThreadContent.tsx b/web-app/src/containers/ThreadContent.tsx index 716c6bfc4..54ba342cb 100644 --- a/web-app/src/containers/ThreadContent.tsx +++ b/web-app/src/containers/ThreadContent.tsx @@ -170,18 +170,33 @@ export const ThreadContent = memo( ) const { reasoningSegment, textSegment } = useMemo(() => { - const isThinking = text.includes('') && !text.includes('') - if (isThinking) return { reasoningSegment: text, textSegment: '' } + // Check for thinking formats + const hasThinkTag = text.includes('') && !text.includes('') + const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>') + + if (hasThinkTag || hasAnalysisChannel) return { reasoningSegment: text, textSegment: '' } - const match = text.match(/([\s\S]*?)<\/think>/) - if (match?.index === undefined) - return { reasoningSegment: undefined, textSegment: text } - - const splitIndex = match.index + match[0].length - return { - reasoningSegment: text.slice(0, splitIndex), - textSegment: text.slice(splitIndex), + // Check for completed think tag format + const thinkMatch = text.match(/([\s\S]*?)<\/think>/) + if (thinkMatch?.index !== undefined) { + const splitIndex = thinkMatch.index + thinkMatch[0].length + return { + reasoningSegment: text.slice(0, splitIndex), + textSegment: text.slice(splitIndex), + } } + + // Check for completed analysis channel format + const analysisMatch = text.match(/<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/) + if (analysisMatch?.index !== undefined) { + const splitIndex = analysisMatch.index + analysisMatch[0].length + return { + reasoningSegment: text.slice(0, splitIndex), + textSegment: text.slice(splitIndex), + } + } + + return { reasoningSegment: undefined, textSegment: text } }, [text]) const { getMessages, deleteMessage } = useMessages() diff --git a/web-app/src/hooks/useModelProvider.ts b/web-app/src/hooks/useModelProvider.ts index bfcb4e3a5..b1a988183 100644 --- a/web-app/src/hooks/useModelProvider.ts +++ b/web-app/src/hooks/useModelProvider.ts @@ -2,6 +2,7 @@ import { create } from 'zustand' import { persist, createJSONStorage } from 'zustand/middleware' import { localStorageKey } from '@/constants/localStorage' import { sep } from '@tauri-apps/api/path' +import { modelSettings } from '@/lib/predefined' type ModelProviderState = { providers: ModelProvider[] @@ -211,8 +212,21 @@ export const useModelProvider = create()( name: localStorageKey.modelProvider, storage: createJSONStorage(() => localStorage), migrate: (persistedState: unknown, version: number) => { - const state = persistedState as ModelProviderState - + const state = persistedState as ModelProviderState & { + providers: Array< + ModelProvider & { + models: Array< + Model & { + settings?: Record & { + chatTemplate?: string + chat_template?: string + } + } + > + } + > + } + // Migration for cont_batching description update (version 0 -> 1) if (version === 0 && state?.providers) { state.providers = state.providers.map((provider) => { @@ -221,7 +235,8 @@ export const useModelProvider = create()( if (setting.key === 'cont_batching') { return { ...setting, - description: 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.' + description: + 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.', } } return setting @@ -230,9 +245,65 @@ export const useModelProvider = create()( return provider }) } + + // Migration for chatTemplate key to chat_template (version 1 -> 2) + if (version === 1 && state?.providers) { + state.providers.forEach((provider) => { + if (provider.models) { + provider.models.forEach((model) => { + // Initialize settings if it doesn't exist + if (!model.settings) { + model.settings = {} + } + + // Migrate chatTemplate key to chat_template + if (model.settings.chatTemplate) { + model.settings.chat_template = model.settings.chatTemplate + delete model.settings.chatTemplate + } + + // Add missing chat_template setting if it doesn't exist + if (!model.settings.chat_template) { + model.settings.chat_template = { + ...modelSettings.chatTemplate, + controller_props: { + ...modelSettings.chatTemplate.controller_props, + }, + } + } + }) + } + }) + } + + // Migration for override_tensor_buffer_type key (version 2 -> 3) + if (version === 2 && state?.providers) { + state.providers.forEach((provider) => { + if (provider.models) { + provider.models.forEach((model) => { + // Initialize settings if it doesn't exist + if (!model.settings) { + model.settings = {} + } + + // Add missing override_tensor_buffer_type setting if it doesn't exist + if (!model.settings.override_tensor_buffer_t) { + model.settings.override_tensor_buffer_t = { + ...modelSettings.override_tensor_buffer_t, + controller_props: { + ...modelSettings.override_tensor_buffer_t + .controller_props, + }, + } + } + }) + } + }) + } + return state }, - version: 1, + version: 3, } ) ) diff --git a/web-app/src/lib/__tests__/messages.test.ts b/web-app/src/lib/__tests__/messages.test.ts index d3051f1a7..d097133bc 100644 --- a/web-app/src/lib/__tests__/messages.test.ts +++ b/web-app/src/lib/__tests__/messages.test.ts @@ -407,6 +407,50 @@ describe('CompletionMessagesBuilder', () => { const result = builder.getMessages() expect(result[0].content).toBe('Clean answer') }) + + it('should remove analysis channel reasoning content', () => { + const builder = new CompletionMessagesBuilder([]) + + builder.addAssistantMessage( + '<|channel|>analysis<|message|>Let me analyze this step by step...<|start|>assistant<|channel|>final<|message|>The final answer is 42.' + ) + + const result = builder.getMessages() + expect(result[0].content).toBe('The final answer is 42.') + }) + + it('should handle analysis channel without final message', () => { + const builder = new CompletionMessagesBuilder([]) + + builder.addAssistantMessage( + '<|channel|>analysis<|message|>Only analysis content here...' + ) + + const result = builder.getMessages() + expect(result[0].content).toBe('<|channel|>analysis<|message|>Only analysis content here...') + }) + + it('should handle analysis channel with multiline content', () => { + const builder = new CompletionMessagesBuilder([]) + + builder.addAssistantMessage( + '<|channel|>analysis<|message|>Step 1: First analysis\nStep 2: Second analysis\nStep 3: Final analysis<|start|>assistant<|channel|>final<|message|>Based on my analysis, here is the result.' + ) + + const result = builder.getMessages() + expect(result[0].content).toBe('Based on my analysis, here is the result.') + }) + + it('should handle both think and analysis channel tags', () => { + const builder = new CompletionMessagesBuilder([]) + + builder.addAssistantMessage( + 'Initial thought<|channel|>analysis<|message|>Detailed analysis<|start|>assistant<|channel|>final<|message|>Final response' + ) + + const result = builder.getMessages() + expect(result[0].content).toBe('Final response') + }) }) describe('integration tests', () => { diff --git a/web-app/src/lib/messages.ts b/web-app/src/lib/messages.ts index 05dc0164d..b187fb514 100644 --- a/web-app/src/lib/messages.ts +++ b/web-app/src/lib/messages.ts @@ -102,6 +102,15 @@ export class CompletionMessagesBuilder { content = content.slice(splitIndex).trim() } } + if (content.includes('<|channel|>analysis<|message|>')) { + const match = content.match( + /<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/ + ) + if (match?.index !== undefined) { + const splitIndex = match.index + match[0].length + content = content.slice(splitIndex).trim() + } + } return content } } diff --git a/web-app/src/lib/predefined.ts b/web-app/src/lib/predefined.ts index a87493722..b4d5164e7 100644 --- a/web-app/src/lib/predefined.ts +++ b/web-app/src/lib/predefined.ts @@ -22,7 +22,6 @@ export const modelSettings = { type: 'number', }, }, - temperature: { key: 'temperature', title: 'Temperature', @@ -121,4 +120,28 @@ export const modelSettings = { type: 'number', }, }, + chatTemplate: { + key: 'chat_template', + title: 'Custom Jinja Chat template', + description: 'Custom Jinja chat_template to be used for the model', + controller_type: 'textarea', + controller_props: { + value: '', + placeholder: + 'e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)', + type: 'text', + textAlign: 'right', + }, + }, + override_tensor_buffer_t: { + key: 'override_tensor_buffer_t', + title: 'Override Tensor Buffer Type', + description: 'Override the tensor buffer type for the model', + controller_type: 'input', + controller_props: { + value: '', + placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU', + type: 'text', + }, + }, } diff --git a/web-app/src/providers/DataProvider.tsx b/web-app/src/providers/DataProvider.tsx index 9fdb802ea..6110f9dd5 100644 --- a/web-app/src/providers/DataProvider.tsx +++ b/web-app/src/providers/DataProvider.tsx @@ -62,7 +62,12 @@ export function DataProvider() { // Check for app updates useEffect(() => { - checkForUpdate() + // Only check for updates if the auto updater is not disabled + // App might be distributed via other package managers + // or methods that handle updates differently + if (!AUTO_UPDATER_DISABLED) { + checkForUpdate() + } }, [checkForUpdate]) const handleDeepLink = (urls: string[] | null) => { @@ -79,7 +84,7 @@ export function DataProvider() { const resource = params.slice(1).join('/') // return { action, provider, resource } navigate({ - to: route.hub.index, + to: route.hub.model, search: { repo: resource, }, diff --git a/web-app/src/routes/hub/$modelId.tsx b/web-app/src/routes/hub/$modelId.tsx index 245909174..d46c20ca2 100644 --- a/web-app/src/routes/hub/$modelId.tsx +++ b/web-app/src/routes/hub/$modelId.tsx @@ -1,5 +1,10 @@ import HeaderPage from '@/containers/HeaderPage' -import { createFileRoute, useParams, useNavigate } from '@tanstack/react-router' +import { + createFileRoute, + useParams, + useNavigate, + useSearch, +} from '@tanstack/react-router' import { IconArrowLeft, IconDownload, @@ -13,23 +18,38 @@ import { RenderMarkdown } from '@/containers/RenderMarkdown' import { useEffect, useMemo, useCallback, useState } from 'react' import { useModelProvider } from '@/hooks/useModelProvider' import { useDownloadStore } from '@/hooks/useDownloadStore' -import { pullModel } from '@/services/models' +import { + CatalogModel, + convertHfRepoToCatalogModel, + fetchHuggingFaceRepo, + pullModel, +} from '@/services/models' import { Progress } from '@/components/ui/progress' import { Button } from '@/components/ui/button' import { cn } from '@/lib/utils' +type SearchParams = { + repo: string +} + export const Route = createFileRoute('/hub/$modelId')({ component: HubModelDetail, + validateSearch: (search: Record): SearchParams => ({ + repo: search.repo as SearchParams['repo'], + }), }) function HubModelDetail() { const { modelId } = useParams({ from: Route.id }) const navigate = useNavigate() const { sources, fetchSources } = useModelSources() + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const search = useSearch({ from: Route.id as any }) const { getProviderByName } = useModelProvider() const llamaProvider = getProviderByName('llamacpp') const { downloads, localDownloadingModels, addLocalDownloadingModel } = useDownloadStore() + const [repoData, setRepoData] = useState() // State for README content const [readmeContent, setReadmeContent] = useState('') @@ -39,10 +59,21 @@ function HubModelDetail() { fetchSources() }, [fetchSources]) + const fetchRepo = useCallback(async () => { + const repoInfo = await fetchHuggingFaceRepo(search.repo || modelId) + if (repoInfo) { + const repoDetail = convertHfRepoToCatalogModel(repoInfo) + setRepoData(repoDetail) + } + }, [modelId, search]) + + useEffect(() => { + fetchRepo() + }, [modelId, fetchRepo]) // Find the model data from sources const modelData = useMemo(() => { - return sources.find((model) => model.model_name === modelId) - }, [sources, modelId]) + return sources.find((model) => model.model_name === modelId) ?? repoData + }, [sources, modelId, repoData]) // Download processes const downloadProcesses = useMemo( @@ -116,7 +147,6 @@ function HubModelDetail() { }) }, [modelData]) - // Fetch README content when modelData.readme is available useEffect(() => { if (modelData?.readme) { diff --git a/web-app/src/routes/hub/index.tsx b/web-app/src/routes/hub/index.tsx index 7c904d1ee..66e079412 100644 --- a/web-app/src/routes/hub/index.tsx +++ b/web-app/src/routes/hub/index.tsx @@ -31,7 +31,7 @@ import { CatalogModel, pullModel, fetchHuggingFaceRepo, - HuggingFaceRepo, + convertHfRepoToCatalogModel, } from '@/services/models' import { useDownloadStore } from '@/hooks/useDownloadStore' import { Progress } from '@/components/ui/progress' @@ -63,14 +63,16 @@ function Hub() { { value: 'newest', name: t('hub:sortNewest') }, { value: 'most-downloaded', name: t('hub:sortMostDownloaded') }, ] - const searchOptions = { - includeScore: true, - // Search in `author` and in `tags` array - keys: ['model_name', 'quants.model_id'], - } + const searchOptions = useMemo(() => { + return { + includeScore: true, + // Search in `author` and in `tags` array + keys: ['model_name', 'quants.model_id'], + } + }, []) const { sources, addSource, fetchSources, loading } = useModelSources() - const search = useSearch({ from: route.hub.index as any }) + const [searchValue, setSearchValue] = useState('') const [sortSelected, setSortSelected] = useState('newest') const [expandedModels, setExpandedModels] = useState>( @@ -92,48 +94,6 @@ function Hub() { const { getProviderByName } = useModelProvider() const llamaProvider = getProviderByName('llamacpp') - // Convert HuggingFace repository to CatalogModel format - const convertHfRepoToCatalogModel = useCallback( - (repo: HuggingFaceRepo): CatalogModel => { - // Extract GGUF files from the repository siblings - const ggufFiles = - repo.siblings?.filter((file) => - file.rfilename.toLowerCase().endsWith('.gguf') - ) || [] - - // Convert GGUF files to quants format - const quants = ggufFiles.map((file) => { - // Format file size - const formatFileSize = (size?: number) => { - if (!size) return 'Unknown size' - if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB` - return `${(size / 1024 ** 3).toFixed(1)} GB` - } - - // Generate model_id from filename (remove .gguf extension, case-insensitive) - const modelId = file.rfilename.replace(/\.gguf$/i, '') - - return { - model_id: modelId, - path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`, - file_size: formatFileSize(file.size), - } - }) - - return { - model_name: repo.modelId, - description: `**Metadata:** ${repo.pipeline_tag}\n\n **Tags**: ${repo.tags?.join(', ')}`, - developer: repo.author, - downloads: repo.downloads || 0, - num_quants: quants.length, - quants: quants, - created_at: repo.created_at, - readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`, - } - }, - [] - ) - const toggleModelExpansion = (modelId: string) => { setExpandedModels((prev) => ({ ...prev, @@ -141,35 +101,6 @@ function Hub() { })) } - useEffect(() => { - if (search.repo) { - setSearchValue(search.repo || '') - setIsSearching(true) - - addModelSourceTimeoutRef.current = setTimeout(async () => { - try { - // Fetch HuggingFace repository information - const repoInfo = await fetchHuggingFaceRepo(search.repo) - if (repoInfo) { - const catalogModel = convertHfRepoToCatalogModel(repoInfo) - if ( - !sources.some((s) => s.model_name === catalogModel.model_name) - ) { - setHuggingFaceRepo(catalogModel) - addSource(catalogModel) - } - } - - await fetchSources() - } catch (error) { - console.error('Error fetching repository info:', error) - } finally { - setIsSearching(false) - } - }, 500) - } - }, [convertHfRepoToCatalogModel, fetchSources, addSource, search, sources]) - // Sorting functionality const sortedModels = useMemo(() => { return [...sources].sort((a, b) => { @@ -264,9 +195,6 @@ function Hub() { addSource(catalogModel) } } - - // Original addSource logic (if needed) - await fetchSources() } catch (error) { console.error('Error fetching repository info:', error) } finally { diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx index c44654a9d..f4f452b29 100644 --- a/web-app/src/routes/settings/providers/$providerName.tsx +++ b/web-app/src/routes/settings/providers/$providerName.tsx @@ -15,7 +15,6 @@ import { import { createFileRoute, Link, - useNavigate, useParams, useSearch, } from '@tanstack/react-router' @@ -33,6 +32,7 @@ import { CustomTooltipJoyRide } from '@/containers/CustomeTooltipJoyRide' import { route } from '@/constants/routes' import DeleteProvider from '@/containers/dialogs/DeleteProvider' import { updateSettings, fetchModelsFromProvider } from '@/services/providers' +import { localStorageKey } from '@/constants/localStorage' import { Button } from '@/components/ui/button' import { IconFolderPlus, IconLoader, IconRefresh } from '@tabler/icons-react' import { getProviders } from '@/services/providers' @@ -84,7 +84,6 @@ function ProviderDetail() { const { getProviderByName, setProviders, updateProvider } = useModelProvider() const provider = getProviderByName(providerName) const isSetup = step === 'setup_remote_provider' - const navigate = useNavigate() // Check if llamacpp provider needs backend configuration const needsBackendConfig = @@ -138,9 +137,7 @@ function ProviderDetail() { const { status } = data if (status === STATUS.FINISHED) { - navigate({ - to: route.home, - }) + localStorage.setItem(localStorageKey.setupCompleted, 'true') } } diff --git a/web-app/src/services/__tests__/models.test.ts b/web-app/src/services/__tests__/models.test.ts index c6f626911..b648b2677 100644 --- a/web-app/src/services/__tests__/models.test.ts +++ b/web-app/src/services/__tests__/models.test.ts @@ -4,6 +4,7 @@ import { fetchModels, fetchModelCatalog, fetchHuggingFaceRepo, + convertHfRepoToCatalogModel, updateModel, pullModel, abortDownload, @@ -12,6 +13,8 @@ import { stopModel, stopAllModels, startModel, + HuggingFaceRepo, + CatalogModel, } from '../models' import { EngineManager, Model } from '@janhq/core' @@ -334,7 +337,9 @@ describe('models service', () => { }) // Test with full URL - await fetchHuggingFaceRepo('https://huggingface.co/microsoft/DialoGPT-medium') + await fetchHuggingFaceRepo( + 'https://huggingface.co/microsoft/DialoGPT-medium' + ) expect(fetch).toHaveBeenCalledWith( 'https://huggingface.co/api/models/microsoft/DialoGPT-medium?blobs=true' ) @@ -380,7 +385,7 @@ describe('models service', () => { it('should handle other HTTP errors', async () => { const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {}) - + ;(fetch as any).mockResolvedValue({ ok: false, status: 500, @@ -394,13 +399,13 @@ describe('models service', () => { 'Error fetching HuggingFace repository:', expect.any(Error) ) - + consoleSpy.mockRestore() }) it('should handle network errors', async () => { const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {}) - + ;(fetch as any).mockRejectedValue(new Error('Network error')) const result = await fetchHuggingFaceRepo('microsoft/DialoGPT-medium') @@ -410,7 +415,7 @@ describe('models service', () => { 'Error fetching HuggingFace repository:', expect.any(Error) ) - + consoleSpy.mockRestore() }) @@ -524,7 +529,303 @@ describe('models service', () => { expect(result).toEqual(mockRepoData) // Verify the GGUF file is present in siblings - expect(result?.siblings?.some(s => s.rfilename.endsWith('.gguf'))).toBe(true) + expect(result?.siblings?.some((s) => s.rfilename.endsWith('.gguf'))).toBe( + true + ) + }) + }) + + describe('convertHfRepoToCatalogModel', () => { + const mockHuggingFaceRepo: HuggingFaceRepo = { + id: 'microsoft/DialoGPT-medium', + modelId: 'microsoft/DialoGPT-medium', + sha: 'abc123', + downloads: 1500, + likes: 75, + tags: ['pytorch', 'transformers', 'text-generation'], + pipeline_tag: 'text-generation', + created_at: '2021-01-01T00:00:00Z', + last_modified: '2021-12-01T00:00:00Z', + private: false, + disabled: false, + gated: false, + author: 'microsoft', + siblings: [ + { + rfilename: 'model-q4_0.gguf', + size: 2 * 1024 * 1024 * 1024, // 2GB + blobId: 'blob123', + }, + { + rfilename: 'model-q8_0.GGUF', // Test case-insensitive matching + size: 4 * 1024 * 1024 * 1024, // 4GB + blobId: 'blob456', + }, + { + rfilename: 'tokenizer.json', // Non-GGUF file (should be filtered out) + size: 1024 * 1024, // 1MB + blobId: 'blob789', + }, + ], + } + + it('should convert HuggingFace repo to catalog model format', () => { + const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo) + + const expected: CatalogModel = { + model_name: 'microsoft/DialoGPT-medium', + description: '**Tags**: pytorch, transformers, text-generation', + developer: 'microsoft', + downloads: 1500, + num_quants: 2, + quants: [ + { + model_id: 'model-q4_0', + path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf', + file_size: '2.0 GB', + }, + { + model_id: 'model-q8_0', + path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF', + file_size: '4.0 GB', + }, + ], + created_at: '2021-01-01T00:00:00Z', + readme: + 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md', + } + + expect(result).toEqual(expected) + }) + + it('should handle repository with no GGUF files', () => { + const repoWithoutGGUF: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + siblings: [ + { + rfilename: 'tokenizer.json', + size: 1024 * 1024, + blobId: 'blob789', + }, + { + rfilename: 'config.json', + size: 2048, + blobId: 'blob101', + }, + ], + } + + const result = convertHfRepoToCatalogModel(repoWithoutGGUF) + + expect(result.num_quants).toBe(0) + expect(result.quants).toEqual([]) + }) + + it('should handle repository with no siblings', () => { + const repoWithoutSiblings: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + siblings: undefined, + } + + const result = convertHfRepoToCatalogModel(repoWithoutSiblings) + + expect(result.num_quants).toBe(0) + expect(result.quants).toEqual([]) + }) + + it('should format file sizes correctly', () => { + const repoWithVariousFileSizes: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + siblings: [ + { + rfilename: 'small-model.gguf', + size: 500 * 1024 * 1024, // 500MB + blobId: 'blob1', + }, + { + rfilename: 'large-model.gguf', + size: 3.5 * 1024 * 1024 * 1024, // 3.5GB + blobId: 'blob2', + }, + { + rfilename: 'unknown-size.gguf', + // No size property + blobId: 'blob3', + }, + ], + } + + const result = convertHfRepoToCatalogModel(repoWithVariousFileSizes) + + expect(result.quants[0].file_size).toBe('500.0 MB') + expect(result.quants[1].file_size).toBe('3.5 GB') + expect(result.quants[2].file_size).toBe('Unknown size') + }) + + it('should handle empty or undefined tags', () => { + const repoWithEmptyTags: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + tags: [], + } + + const result = convertHfRepoToCatalogModel(repoWithEmptyTags) + + expect(result.description).toBe('**Tags**: ') + }) + + it('should handle missing downloads count', () => { + const repoWithoutDownloads: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + downloads: undefined as any, + } + + const result = convertHfRepoToCatalogModel(repoWithoutDownloads) + + expect(result.downloads).toBe(0) + }) + + it('should correctly remove .gguf extension from model IDs', () => { + const repoWithVariousGGUF: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + siblings: [ + { + rfilename: 'model.gguf', + size: 1024, + blobId: 'blob1', + }, + { + rfilename: 'MODEL.GGUF', + size: 1024, + blobId: 'blob2', + }, + { + rfilename: 'complex-model-name.gguf', + size: 1024, + blobId: 'blob3', + }, + ], + } + + const result = convertHfRepoToCatalogModel(repoWithVariousGGUF) + + expect(result.quants[0].model_id).toBe('model') + expect(result.quants[1].model_id).toBe('MODEL') + expect(result.quants[2].model_id).toBe('complex-model-name') + }) + + it('should generate correct download paths', () => { + const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo) + + expect(result.quants[0].path).toBe( + 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf' + ) + expect(result.quants[1].path).toBe( + 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF' + ) + }) + + it('should generate correct readme URL', () => { + const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo) + + expect(result.readme).toBe( + 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md' + ) + }) + + it('should handle GGUF files with case-insensitive extension matching', () => { + const repoWithMixedCase: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + siblings: [ + { + rfilename: 'model-1.gguf', + size: 1024, + blobId: 'blob1', + }, + { + rfilename: 'model-2.GGUF', + size: 1024, + blobId: 'blob2', + }, + { + rfilename: 'model-3.GgUf', + size: 1024, + blobId: 'blob3', + }, + { + rfilename: 'not-a-model.txt', + size: 1024, + blobId: 'blob4', + }, + ], + } + + const result = convertHfRepoToCatalogModel(repoWithMixedCase) + + expect(result.num_quants).toBe(3) + expect(result.quants).toHaveLength(3) + expect(result.quants[0].model_id).toBe('model-1') + expect(result.quants[1].model_id).toBe('model-2') + expect(result.quants[2].model_id).toBe('model-3') + }) + + it('should handle edge cases with file size formatting', () => { + const repoWithEdgeCases: HuggingFaceRepo = { + ...mockHuggingFaceRepo, + siblings: [ + { + rfilename: 'tiny.gguf', + size: 512, // < 1MB + blobId: 'blob1', + }, + { + rfilename: 'exactly-1gb.gguf', + size: 1024 * 1024 * 1024, // Exactly 1GB + blobId: 'blob2', + }, + { + rfilename: 'zero-size.gguf', + size: 0, + blobId: 'blob3', + }, + ], + } + + const result = convertHfRepoToCatalogModel(repoWithEdgeCases) + + expect(result.quants[0].file_size).toBe('0.0 MB') + expect(result.quants[1].file_size).toBe('1.0 GB') + expect(result.quants[2].file_size).toBe('Unknown size') // 0 is falsy, so it returns 'Unknown size' + }) + + it('should handle missing optional fields gracefully', () => { + const minimalRepo: HuggingFaceRepo = { + id: 'minimal/repo', + modelId: 'minimal/repo', + sha: 'abc123', + downloads: 0, + likes: 0, + tags: [], + created_at: '2021-01-01T00:00:00Z', + last_modified: '2021-12-01T00:00:00Z', + private: false, + disabled: false, + gated: false, + author: 'minimal', + siblings: [ + { + rfilename: 'model.gguf', + blobId: 'blob1', + }, + ], + } + + const result = convertHfRepoToCatalogModel(minimalRepo) + + expect(result.model_name).toBe('minimal/repo') + expect(result.developer).toBe('minimal') + expect(result.downloads).toBe(0) + expect(result.description).toBe('**Tags**: ') + expect(result.quants[0].file_size).toBe('Unknown size') }) }) }) diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts index 71911244f..12bf1997d 100644 --- a/web-app/src/services/models.ts +++ b/web-app/src/services/models.ts @@ -134,6 +134,47 @@ export const fetchHuggingFaceRepo = async ( } } +// Convert HuggingFace repository to CatalogModel format +export const convertHfRepoToCatalogModel = ( + repo: HuggingFaceRepo +): CatalogModel => { + // Extract GGUF files from the repository siblings + const ggufFiles = + repo.siblings?.filter((file) => + file.rfilename.toLowerCase().endsWith('.gguf') + ) || [] + + // Convert GGUF files to quants format + const quants = ggufFiles.map((file) => { + // Format file size + const formatFileSize = (size?: number) => { + if (!size) return 'Unknown size' + if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB` + return `${(size / 1024 ** 3).toFixed(1)} GB` + } + + // Generate model_id from filename (remove .gguf extension, case-insensitive) + const modelId = file.rfilename.replace(/\.gguf$/i, '') + + return { + model_id: modelId, + path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`, + file_size: formatFileSize(file.size), + } + }) + + return { + model_name: repo.modelId, + description: `**Tags**: ${repo.tags?.join(', ')}`, + developer: repo.author, + downloads: repo.downloads || 0, + num_quants: quants.length, + quants: quants, + created_at: repo.created_at, + readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`, + } +} + /** * Updates a model. * @param model The model to update. diff --git a/web-app/src/types/global.d.ts b/web-app/src/types/global.d.ts index fe33f3d46..b104314b0 100644 --- a/web-app/src/types/global.d.ts +++ b/web-app/src/types/global.d.ts @@ -19,6 +19,7 @@ declare global { declare const POSTHOG_KEY: string declare const POSTHOG_HOST: string declare const MODEL_CATALOG_URL: string + declare const AUTO_UPDATER_DISABLED: boolean interface Window { core: AppCore | undefined } diff --git a/web-app/vite.config.ts b/web-app/vite.config.ts index 697c1a64f..4c1b2ab40 100644 --- a/web-app/vite.config.ts +++ b/web-app/vite.config.ts @@ -33,19 +33,19 @@ export default defineConfig(({ mode }) => { define: { IS_TAURI: JSON.stringify(process.env.IS_TAURI), IS_MACOS: JSON.stringify( - process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? 'false' + process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? false ), IS_WINDOWS: JSON.stringify( - process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? 'false' + process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? false ), IS_LINUX: JSON.stringify( - process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? 'false' + process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? false ), IS_IOS: JSON.stringify( - process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? 'false' + process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? false ), IS_ANDROID: JSON.stringify( - process.env.TAURI_ENV_PLATFORM?.includes('android') ?? 'false' + process.env.TAURI_ENV_PLATFORM?.includes('android') ?? false ), PLATFORM: JSON.stringify(process.env.TAURI_ENV_PLATFORM), @@ -56,6 +56,9 @@ export default defineConfig(({ mode }) => { MODEL_CATALOG_URL: JSON.stringify( 'https://raw.githubusercontent.com/menloresearch/model-catalog/main/model_catalog.json' ), + AUTO_UPDATER_DISABLED: JSON.stringify( + env.AUTO_UPDATER_DISABLED === 'true' + ), }, // Vite options tailored for Tauri development and only applied in `tauri dev` or `tauri build` diff --git a/web-app/vitest.config.ts b/web-app/vitest.config.ts index e30c4d545..c2289f337 100644 --- a/web-app/vitest.config.ts +++ b/web-app/vitest.config.ts @@ -38,5 +38,6 @@ export default defineConfig({ VERSION: JSON.stringify('test'), POSTHOG_KEY: JSON.stringify(''), POSTHOG_HOST: JSON.stringify(''), + AUTO_UPDATER_DISABLED: JSON.stringify('false'), }, }) diff --git a/website/astro.config.mjs b/website/astro.config.mjs index 63d93bea6..ba894459c 100644 --- a/website/astro.config.mjs +++ b/website/astro.config.mjs @@ -81,10 +81,22 @@ export default defineConfig({ label: 'MCP Examples', collapsed: true, items: [ + { + label: 'Browser Control (Browserbase)', + slug: 'jan/mcp-examples/browser/browserbase', + }, { label: 'Code Sandbox (E2B)', slug: 'jan/mcp-examples/data-analysis/e2b', }, + { + label: 'Design Creation (Canva)', + slug: 'jan/mcp-examples/design/canva', + }, + { + label: 'Deep Research (Octagon)', + slug: 'jan/mcp-examples/deepresearch/octagon', + }, { label: 'Web Search with Exa', slug: 'jan/mcp-examples/search/exa', @@ -107,6 +119,10 @@ export default defineConfig({ label: 'Llama.cpp Server', slug: 'local-server/llama-cpp', }, + { + label: 'Server Troubleshooting', + slug: 'local-server/troubleshooting', + }, { label: 'Integrations', collapsed: true, diff --git a/website/src/assets/browserbase.png b/website/src/assets/browserbase.png new file mode 100644 index 000000000..7624e187b Binary files /dev/null and b/website/src/assets/browserbase.png differ diff --git a/website/src/assets/browserbase2.png b/website/src/assets/browserbase2.png new file mode 100644 index 000000000..b4b1793be Binary files /dev/null and b/website/src/assets/browserbase2.png differ diff --git a/website/src/assets/browserbase3.png b/website/src/assets/browserbase3.png new file mode 100644 index 000000000..5d7836e53 Binary files /dev/null and b/website/src/assets/browserbase3.png differ diff --git a/website/src/assets/browserbase4.png b/website/src/assets/browserbase4.png new file mode 100644 index 000000000..79d460021 Binary files /dev/null and b/website/src/assets/browserbase4.png differ diff --git a/website/src/assets/browserbase5.png b/website/src/assets/browserbase5.png new file mode 100644 index 000000000..bc6d97a16 Binary files /dev/null and b/website/src/assets/browserbase5.png differ diff --git a/website/src/assets/browserbase6.png b/website/src/assets/browserbase6.png new file mode 100644 index 000000000..f33f863d7 Binary files /dev/null and b/website/src/assets/browserbase6.png differ diff --git a/website/src/assets/browserbase7.png b/website/src/assets/browserbase7.png new file mode 100644 index 000000000..005d9e15f Binary files /dev/null and b/website/src/assets/browserbase7.png differ diff --git a/website/src/assets/canva.png b/website/src/assets/canva.png new file mode 100644 index 000000000..5fe27c779 Binary files /dev/null and b/website/src/assets/canva.png differ diff --git a/website/src/assets/canva2.png b/website/src/assets/canva2.png new file mode 100644 index 000000000..63f934132 Binary files /dev/null and b/website/src/assets/canva2.png differ diff --git a/website/src/assets/canva3.png b/website/src/assets/canva3.png new file mode 100644 index 000000000..db07b6ddb Binary files /dev/null and b/website/src/assets/canva3.png differ diff --git a/website/src/assets/canva4.png b/website/src/assets/canva4.png new file mode 100644 index 000000000..6dac7b7e1 Binary files /dev/null and b/website/src/assets/canva4.png differ diff --git a/website/src/assets/canva5.png b/website/src/assets/canva5.png new file mode 100644 index 000000000..7c8ee337f Binary files /dev/null and b/website/src/assets/canva5.png differ diff --git a/website/src/assets/canva6.png b/website/src/assets/canva6.png new file mode 100644 index 000000000..d98f4cec3 Binary files /dev/null and b/website/src/assets/canva6.png differ diff --git a/website/src/assets/canva7.png b/website/src/assets/canva7.png new file mode 100644 index 000000000..2d6ca7275 Binary files /dev/null and b/website/src/assets/canva7.png differ diff --git a/website/src/assets/canva8.png b/website/src/assets/canva8.png new file mode 100644 index 000000000..54d397b57 Binary files /dev/null and b/website/src/assets/canva8.png differ diff --git a/website/src/assets/canva9.png b/website/src/assets/canva9.png new file mode 100644 index 000000000..d242c7da2 Binary files /dev/null and b/website/src/assets/canva9.png differ diff --git a/website/src/assets/octagon.png b/website/src/assets/octagon.png new file mode 100644 index 000000000..416d4672d Binary files /dev/null and b/website/src/assets/octagon.png differ diff --git a/website/src/assets/octagon2.png b/website/src/assets/octagon2.png new file mode 100644 index 000000000..6afb8cb57 Binary files /dev/null and b/website/src/assets/octagon2.png differ diff --git a/website/src/assets/octagon3.png b/website/src/assets/octagon3.png new file mode 100644 index 000000000..2b7af9dfe Binary files /dev/null and b/website/src/assets/octagon3.png differ diff --git a/website/src/assets/octagon4.png b/website/src/assets/octagon4.png new file mode 100644 index 000000000..4995a9576 Binary files /dev/null and b/website/src/assets/octagon4.png differ diff --git a/website/src/assets/octagon5.png b/website/src/assets/octagon5.png new file mode 100644 index 000000000..9264ed5a2 Binary files /dev/null and b/website/src/assets/octagon5.png differ diff --git a/website/src/assets/octagon6.png b/website/src/assets/octagon6.png new file mode 100644 index 000000000..5645f99af Binary files /dev/null and b/website/src/assets/octagon6.png differ diff --git a/website/src/assets/octagon7.png b/website/src/assets/octagon7.png new file mode 100644 index 000000000..3f37d0784 Binary files /dev/null and b/website/src/assets/octagon7.png differ diff --git a/website/src/assets/octagon8.png b/website/src/assets/octagon8.png new file mode 100644 index 000000000..bb5d99b53 Binary files /dev/null and b/website/src/assets/octagon8.png differ diff --git a/website/src/assets/octagon9.png b/website/src/assets/octagon9.png new file mode 100644 index 000000000..a6e39caf4 Binary files /dev/null and b/website/src/assets/octagon9.png differ diff --git a/website/src/content/docs/jan/explanation/model-parameters.mdx b/website/src/content/docs/jan/explanation/model-parameters.mdx index 4b60afd68..d86fa5f44 100644 --- a/website/src/content/docs/jan/explanation/model-parameters.mdx +++ b/website/src/content/docs/jan/explanation/model-parameters.mdx @@ -16,8 +16,9 @@ keywords: parameters, ] --- +import { Aside, Steps } from '@astrojs/starlight/components' -import { Aside } from '@astrojs/starlight/components'; +# Model Parameters Model parameters control how your AI thinks and responds. Think of them as the AI's personality settings and performance controls. @@ -32,7 +33,7 @@ Model parameters control how your AI thinks and responds. Think of them as the A **For model capabilities:** - Click the **edit button** next to a model to enable features like vision or tools -## Performance Settings +## Performance Settings (Gear Icon) These settings control how the model thinks and performs: @@ -51,7 +52,7 @@ These settings control how the model thinks and performs: ![Model Parameters](../../../../assets/model-parameters.png) -## Model Capabilities +## Model Capabilities (Edit Button) These toggle switches enable special features: diff --git a/website/src/content/docs/jan/mcp-examples/browser/browserbase.mdx b/website/src/content/docs/jan/mcp-examples/browser/browserbase.mdx new file mode 100644 index 000000000..a8963d029 --- /dev/null +++ b/website/src/content/docs/jan/mcp-examples/browser/browserbase.mdx @@ -0,0 +1,273 @@ +--- +title: Browserbase MCP +description: Control browsers with natural language through Browserbase's cloud infrastructure. +keywords: + [ + Jan, + MCP, + Model Context Protocol, + Browserbase, + browser automation, + web scraping, + Stagehand, + headless browser, + tool calling, + ] +--- + +import { Aside, Steps } from '@astrojs/starlight/components' + +[Browserbase MCP](https://docs.browserbase.com/integrations/mcp/introduction) gives AI models actual browser control through cloud infrastructure. Built on Stagehand, it lets you navigate websites, extract data, and interact with web pages using natural language commands. + +The integration provides real browser sessions that AI can control, enabling tasks that go beyond simple web search APIs. + +## Available Tools + + + +### Multi-Session Tools +- `multi_browserbase_stagehand_session_create`: Create parallel browser sessions +- `multi_browserbase_stagehand_session_list`: Track active sessions +- `multi_browserbase_stagehand_session_close`: Clean up sessions +- `multi_browserbase_stagehand_navigate_session`: Navigate in specific session + +### Core Browser Actions +- `browserbase_stagehand_navigate`: Navigate to URLs +- `browserbase_stagehand_act`: Perform actions ("click the login button") +- `browserbase_stagehand_extract`: Extract text content +- `browserbase_stagehand_observe`: Find page elements +- `browserbase_screenshot`: Capture screenshots + +### Session Management +- `browserbase_session_create`: Create or reuse sessions +- `browserbase_session_close`: Close active sessions + +## Prerequisites + +- Jan with MCP enabled +- Browserbase account (includes 60 minutes free usage) +- Model with strong tool calling support +- Node.js installed + + + +## Setup + +### Enable MCP + +1. Go to **Settings** > **MCP Servers** +2. Toggle **Allow All MCP Tool Permission** ON + +![MCP settings page with toggle enabled](../../../../../assets/mcp-on.png) + +### Get Browserbase Credentials + +1. Sign up at [browserbase.com](https://browserbase.com) + - Email verification required + - Phone number authentication + - Thorough security process + +2. Access your dashboard and copy: + - **API Key** + - **Project ID** + +![Browserbase dashboard showing API key and project ID](../../../../../assets/browserbase.png) + +### Configure MCP Server + +Click `+` in MCP Servers section: + +**NPM Package Configuration:** +- **Server Name**: `browserbase` +- **Command**: `npx` +- **Arguments**: `@browserbasehq/mcp-server-browserbase` +- **Environment Variables**: + - Key: `BROWSERBASE_API_KEY`, Value: `your-api-key` + - Key: `BROWSERBASE_PROJECT_ID`, Value: `your-project-id` + +![Jan MCP server configuration with Browserbase settings](../../../../../assets/browserbase3.png) + +### Verify Setup + +Check the tools bubble in chat to confirm Browserbase tools are available: + +![Chat interface showing available Browserbase tools](../../../../../assets/browserbase2.png) + +## Real Usage Example + +### Live Information Query + +``` +Which sports matches are happening right now in Australia (irrespective of the sport)? +``` + +This simple query demonstrates browser automation in action: + +1. **Tool Activation** + - Model creates browser session + - Navigates to sports websites + - Extracts current match data + +![Model using browser tools to search for information](../../../../../assets/browserbase5.png) + +2. **Results Delivery** + - Real-time match information + - Multiple sports covered + - Current scores and timings + +![Final response with Australian sports matches](../../../../../assets/browserbase6.png) + +The AI successfully found: +- AFL matches with live scores +- NRL games in progress +- Upcoming Rugby Union fixtures + +## Common Issues + +### Tool Call Failures + +Sometimes tool calls fail due to parsing issues: + +![Tool call error showing parsing problem](../../../../../assets/browserbase7.png) + +**Solutions:** +- Try rephrasing your prompt +- Disable unnecessary tools +- Use simpler, more direct requests +- Switch to Claude 3.5+ Sonnet if using another model + +### Model Limitations + +Most models struggle with multiple tools. If experiencing issues: +- Start with single-purpose requests +- Build complexity gradually +- Consider which tools are actually needed +- Expect some trial and error initially + +## Usage Limits + +**Free Tier:** +- 60 minutes of browser time included +- Sessions auto-terminate after 5 minutes inactivity +- Can adjust timeout in Browserbase dashboard +- Usage visible in dashboard analytics + +**Session Management:** +- Each browser session counts against time +- Close sessions when done to conserve minutes +- Multi-session operations consume time faster + +## Practical Use Cases + +### Real-Time Data Collection +``` +Check current prices for MacBook Pro M4 at major Australian retailers and create a comparison table. +``` + +### Form Testing +``` +Navigate to myservice.gov.au and walk through the Medicare claim process, documenting each required field. +``` + +### Content Monitoring +``` +Visit ABC News Australia and extract the top 5 breaking news headlines with their timestamps. +``` + +### Multi-Site Analysis +``` +Compare flight prices from Sydney to Tokyo next week across Qantas, Jetstar, and Virgin Australia. +``` + +### Automated Verification +``` +Check if our company is listed correctly on Google Maps, Yelp, and Yellow Pages, noting any discrepancies. +``` + +## Advanced Techniques + +### Session Reuse +``` +Create a browser session, log into LinkedIn, then search for "AI engineers in Melbourne" and extract the first 10 profiles. +``` + +### Parallel Operations +``` +Create three browser sessions: monitor stock prices on ASX, check crypto on CoinSpot, and track forex on XE simultaneously. +``` + +### Sequential Workflows +``` +Go to seek.com.au, search for "data scientist" jobs in Sydney, apply filters for $150k+, then extract job titles and companies. +``` + +## Optimization Tips + +**Prompt Engineering:** +- Be specific about what to extract +- Name exact websites when possible +- Break complex tasks into steps +- Specify output format clearly + +**Tool Selection:** +- Use multi-session only when needed +- Close sessions promptly +- Choose observe before act when possible +- Screenshot sparingly to save time + +**Error Recovery:** +- Have fallback prompts ready +- Start simple, add complexity +- Watch for timeout warnings +- Monitor usage in dashboard + +## Troubleshooting + +**Connection Issues:** +- Verify API key and Project ID +- Check Browserbase service status +- Ensure NPX can download packages +- Restart Jan after configuration + +**Browser Failures:** +- Some sites block automation +- Try different navigation paths +- Check if site requires login +- Verify target site is accessible + +**Performance Problems:** +- Reduce concurrent sessions +- Simplify extraction requests +- Check remaining time quota +- Consider upgrading plan + +**Model Struggles:** +- Too many tools overwhelm most models +- Claude 3.5+ Sonnet most reliable +- Reduce available tools if needed +- Use focused, clear instructions + + + +## Browserbase vs Browser Use + +| Feature | Browserbase | Browser Use | +|---------|-------------|-------------| +| **Infrastructure** | Cloud browsers | Local browser | +| **Setup Complexity** | API key only | Python environment | +| **Performance** | Consistent | System dependent | +| **Cost** | Usage-based | Free (local resources) | +| **Reliability** | High | Variable | +| **Privacy** | Cloud-based | Fully local | + +## Next Steps + +Browserbase MCP provides genuine browser automation capabilities, not just web search. This enables complex workflows like form filling, multi-site monitoring, and data extraction that would be impossible with traditional APIs. + +The cloud infrastructure handles browser complexity while Jan maintains conversational privacy. Just remember: with great browser power comes occasional parsing errors. diff --git a/website/src/content/docs/jan/mcp-examples/deepresearch/octagon.mdx b/website/src/content/docs/jan/mcp-examples/deepresearch/octagon.mdx new file mode 100644 index 000000000..aba5cc9d9 --- /dev/null +++ b/website/src/content/docs/jan/mcp-examples/deepresearch/octagon.mdx @@ -0,0 +1,259 @@ +--- +title: Octagon Deep Research MCP +description: Finance-focused deep research with AI-powered analysis through Octagon's MCP integration. +keywords: + [ + Jan, + MCP, + Model Context Protocol, + Octagon, + deep research, + financial research, + private equity, + market analysis, + technical research, + tool calling, + ] +--- + +import { Aside, Steps } from '@astrojs/starlight/components' + + +[Octagon Deep Research MCP](https://docs.octagonagents.com/guide/deep-research-mcp.html) provides specialized AI research capabilities with a strong focus on financial markets and business intelligence. Unlike general research tools, Octagon excels at complex financial analysis, market dynamics, and investment research. + +The integration delivers comprehensive reports that combine multiple data sources, cross-verification, and actionable insights - particularly useful for understanding market structures, investment strategies, and business models. + +## Available Tools + +### octagon-agent +Orchestrates comprehensive market intelligence research, particularly strong in: +- Financial market analysis +- Private equity and M&A research +- Corporate structure investigations +- Investment strategy evaluation + +### octagon-scraper-agent +Specialized web scraping for public and private market data: +- SEC filings and regulatory documents +- Company financials and metrics +- Market transaction data +- Industry reports and analysis + +### octagon-deep-research-agent +Comprehensive research synthesis combining: +- Multi-source data aggregation +- Cross-verification of claims +- Historical trend analysis +- Actionable insights generation + +## Prerequisites + +- Jan with MCP enabled +- Octagon account (includes 2-week Pro trial) +- Model with tool calling support +- Node.js installed + + + +## Setup + +### Enable MCP + +1. Go to **Settings** > **MCP Servers** +2. Toggle **Allow All MCP Tool Permission** ON + +![MCP settings page with toggle enabled](../../../../../assets/mcp-on.png) + +### Get Octagon API Key + +1. Sign up at [Octagon signup page](https://app.octagonai.co/signup/?redirectToAfterSignup=https://app.octagonai.co/api-keys) +2. Navigate to the API playground +3. Copy your API key from the dashboard + +![Octagon API playground showing API key location](../../../../../assets/octagon2.png) + +### Configure MCP Server + +Click `+` in MCP Servers section: + +**NPM Package Configuration:** +- **Server Name**: `octagon-mcp-server` +- **Command**: `npx` +- **Arguments**: `-y octagon-mcp@latest` +- **Environment Variables**: + - Key: `OCTAGON_API_KEY`, Value: `your-api-key` + +![Jan MCP server configuration with Octagon settings](../../../../../assets/octagon3.png) + +### Verify Setup + +Check the tools bubble in chat to confirm Octagon tools are available: + +![Chat interface showing available Octagon tools with moonshotai/kimi-k2 model](../../../../../assets/octagon4.png) + +## Real-World Example: Private Equity Analysis + +Here's an actual deep research query demonstrating Octagon's financial analysis capabilities: + +### The Prompt + +``` +Break apart the private equity paradox: How did an industry that promises to "unlock value" become synonymous with gutting companies, yet still attracts the world's smartest money? + +Start with the mechanics—how PE firms use other people's money to buy companies with borrowed cash, then charge fees for the privilege. Trace the evolution from corporate raiders of the 1980s to today's trillion-dollar titans like Blackstone, KKR, and Apollo. Use SEC filings, M&A databases, and bankruptcy records to map their empires. + +Dig into specific deals that illustrate the dual nature: companies genuinely transformed versus those stripped and flipped. Compare Toys "R" Us's death to Hilton's resurrection. Examine how PE-owned companies fare during economic downturns—do they really have "patient capital" or do they bleed portfolio companies dry through dividend recaps? + +Investigate the fee structure that makes partners billionaires regardless of performance. Calculate the real returns after the 2-and-20 (or worse) fee structures. Why do pension funds and endowments keep pouring money in despite academic studies showing they'd do better in index funds? + +Explore the revolving door between PE, government, and central banks. How many Fed officials and Treasury secretaries came from or went to PE? Map the political donations and lobbying expenditures that keep carried interest taxed as capital gains. + +Address the human cost through labor statistics and case studies—what happens to employees when PE takes over? But also examine when PE genuinely saves failing companies and preserves jobs. + +Write this as if explaining to a skeptical but curious friend over drinks—clear language, no jargon without explanation, and enough dry humor to make the absurdities apparent. Think Michael Lewis meets Matt Levine. Keep it under 3,000 words but pack it with hard data and real examples. The goal: help readers understand why PE is simultaneously capitalism's most sophisticated expression and its most primitive. +``` + +![Prompt entered in Jan UI](../../../../../assets/octagon5.png) + +### Research Process + +The AI engages multiple Octagon tools to gather comprehensive data: + +![Kimi model using Octagon tools for research](../../../../../assets/octagon6.png) + +### The Results + +Octagon delivers a detailed analysis covering: + +**Part 1: The Mechanics Explained** +![First part of the research report](../../../../../assets/octagon7.png) + +**Part 2: Historical Analysis and Case Studies** +![Second part showing PE evolution and specific deals](../../../../../assets/octagon8.png) + +**Part 3: Financial Engineering and Human Impact** +![Final section on fee structures and consequences](../../../../../assets/octagon9.png) + +The report demonstrates Octagon's ability to: +- Access and analyze SEC filings +- Compare multiple deal outcomes +- Calculate real returns after fees +- Track political connections +- Assess human impact with data + +## Finance-Focused Use Cases + +### Investment Research +``` +Analyze Tesla's vertical integration strategy vs traditional automakers. Include supply chain dependencies, margin analysis, and capital efficiency metrics from the last 5 years. +``` + +### Market Structure Analysis +``` +Map the concentration of market makers in US equities. Who controls order flow, what are their profit margins, and how has this changed since zero-commission trading? +``` + +### Corporate Governance +``` +Investigate executive compensation at the 10 largest US banks post-2008. Compare pay ratios, stock buybacks vs R&D spending, and correlation with shareholder returns. +``` + +### Private Market Intelligence +``` +Track Series B+ funding rounds in AI/ML companies in 2024. Identify valuation trends, investor concentration, and compare to public market multiples. +``` + +### Regulatory Analysis +``` +Examine how Basel III implementation differs across major markets. Which banks gained competitive advantages and why? +``` + +### M&A Strategy +``` +Analyze Microsoft's acquisition strategy under Nadella. Calculate actual vs projected synergies, integration success rates, and impact on market position. +``` + +## Technical Research Capabilities + +While finance-focused, Octagon also handles technical research: + +### Framework Evaluation +``` +Compare Kubernetes alternatives for edge computing. Consider resource usage, latency, reliability, and operational complexity with real deployment data. +``` + +### API Economics +``` +Analyze the unit economics of major AI API providers. Include pricing history, usage patterns, and margin estimates based on reported compute costs. +``` + +### Open Source Sustainability +``` +Research funding models for critical open source infrastructure. Which projects are at risk and what are the economic incentives misalignments? +``` + +## Research Quality + +Octagon's reports typically include: +- **Primary Sources**: SEC filings, earnings calls, regulatory documents +- **Quantitative Analysis**: Financial metrics, ratios, trend analysis +- **Comparative Studies**: Peer benchmarking, historical context +- **Narrative Clarity**: Complex topics explained accessibly +- **Actionable Insights**: Not just data, but implications + +## Troubleshooting + +**Authentication Issues:** +- Verify API key from Octagon dashboard +- Check trial status hasn't expired +- Ensure correct API key format +- Contact Octagon support if needed + +**Research Failures:** +- Some queries may exceed scope (try narrowing) +- Financial data may have access restrictions +- Break complex queries into parts +- Allow time for comprehensive research + +**Tool Calling Problems:** +- Not all models handle multiple tools well +- Kimi-k2 via OpenRouter works reliably +- Claude 3.5+ Sonnet also recommended +- Enable tool calling in model settings + +**Performance Considerations:** +- Deep research takes time (be patient) +- Complex financial analysis may take minutes +- Monitor API usage in dashboard +- Consider query complexity vs urgency + + + +## Pricing After Trial + +After the 2-week Pro trial: +- Check current pricing at octagonagents.com +- Usage-based pricing for API access +- Different tiers for research depth +- Educational discounts may be available + +## Octagon vs Other Research Tools + +| Feature | Octagon | ChatGPT Deep Research | Perplexity | +|---------|---------|----------------------|------------| +| **Finance Focus** | Specialized | General | General | +| **Data Sources** | Financial databases | Web-wide | Web-wide | +| **SEC Integration** | Native | Limited | Limited | +| **Market Data** | Comprehensive | Basic | Basic | +| **Research Depth** | Very Deep | Deep | Moderate | +| **Speed** | Moderate | Slow | Fast | + +## Next Steps + +Octagon Deep Research MCP excels at complex financial analysis that would typically require a team of analysts. The integration provides institutional-quality research capabilities within Jan's conversational interface. + +Whether analyzing market structures, evaluating investments, or understanding business models, Octagon delivers the depth and accuracy that financial professionals expect, while maintaining readability for broader audiences. diff --git a/website/src/content/docs/jan/mcp-examples/design/canva.mdx b/website/src/content/docs/jan/mcp-examples/design/canva.mdx new file mode 100644 index 000000000..008bff70f --- /dev/null +++ b/website/src/content/docs/jan/mcp-examples/design/canva.mdx @@ -0,0 +1,279 @@ +--- +title: Canva MCP +description: Create and manage designs through natural language commands with Canva's official MCP server. +keywords: + [ + Jan, + MCP, + Model Context Protocol, + Canva, + design automation, + graphic design, + presentations, + templates, + tool calling, + ] +--- + +import { Aside, Steps } from '@astrojs/starlight/components' + +[Canva MCP](https://www.canva.com/newsroom/news/deep-research-integration-mcp-server/) gives AI models the ability to create, search, and manage designs directly within Canva. As the first design platform with native MCP integration, it lets you generate presentations, logos, and marketing materials through conversation rather than clicking through design interfaces. + +The integration provides comprehensive design capabilities without leaving your chat, though actual editing still happens in Canva's interface. + +## Available Tools + + + +### Design Operations +- **generate-design**: Create new designs using AI prompts +- **search-designs**: Search docs, presentations, videos, whiteboards +- **get-design**: Get detailed information about a Canva design +- **get-design-pages**: List pages in multi-page designs +- **get-design-content**: Extract content from designs +- **resize-design**: Adapt designs to different dimensions +- **get-design-resize-status**: Check resize operation status +- **get-design-generation-job**: Track AI generation progress + +### Import/Export +- **import-design-from-url**: Import files from URLs as new designs +- **get-design-import-from-url**: Check import status +- **export-design**: Export designs in various formats +- **get-export-formats**: List available export options +- **get-design-export-status**: Track export progress + +### Organization +- **create-folder**: Create folders in Canva +- **move-item-to-folder**: Organize designs and assets +- **list-folder-items**: Browse folder contents + +### Collaboration +- **comment-on-design**: Add comments to designs +- **list-comments**: View design comments +- **list-replies**: See comment threads +- **reply-to-comment**: Respond to feedback + +### Legacy Tools +- **search**: ChatGPT connector (use search-designs instead) +- **fetch**: Content retrieval for ChatGPT + +## Prerequisites + +- Jan with MCP enabled +- Canva account (free or paid) +- Model with tool calling support +- Node.js installed +- Internet connection for Canva API access + +## Setup + +### Enable MCP + +1. Go to **Settings** > **MCP Servers** +2. Toggle **Allow All MCP Tool Permission** ON + +![MCP settings page with toggle enabled](../../../../../assets/mcp-on.png) + +### Configure Canva MCP Server + +Click `+` in MCP Servers section: + +**Configuration:** +- **Server Name**: `Canva` +- **Command**: `npx` +- **Arguments**: `-y mcp-remote@latest https://mcp.canva.com/mcp` +- **Environment Variables**: Leave empty (authentication handled via OAuth) + +![Canva MCP server configuration in Jan](../../../../../assets/canva.png) + +### Authentication Process + +When you first use Canva tools: + +1. **Browser Opens Automatically** + - Canva authentication page appears in your default browser + - Log in with your Canva account + +![Canva authentication page](../../../../../assets/canva2.png) + +2. **Team Selection & Permissions** + - Select your team (if you have multiple) + - Review permissions the AI will have + - Click **Allow** to grant access + +![Canva team selection and permissions](../../../../../assets/canva3.png) + +The permissions include: +- Reading your profile and designs +- Creating new designs +- Managing folders and content +- Accessing team brand templates +- Commenting on designs + +### Model Configuration + +Use a tool-enabled model: + +- **Anthropic Claude 3.5+ Sonnet** +- **OpenAI GPT-4o** +- **Google Gemini Pro** + +## Real-World Usage Example + +Here's an actual workflow creating a company logo: + +### Initial Setup Confirmation + +``` +Are you able to access my projects? +``` + +The AI explains available capabilities: + +![AI response about available actions](../../../../../assets/canva4.png) + +### Design Creation Request + +``` +Create new designs with AI. Call it "VibeBusiness" and have it be a company focused on superintelligence for the benefit of humanity. +``` + +The AI initiates design generation: + +![AI generating design with tool call visible](../../../../../assets/canva5.png) + +### Design Options + +The AI creates multiple logo variations: + +**First Option:** +![First logo design option](../../../../../assets/canva6.png) + +**Selected Design:** +![Selected logo design](../../../../../assets/canva7.png) + +### Final Result + +After selection, the AI confirms: + +![Final response with design ready](../../../../../assets/canva8.png) + +Clicking the design link opens it directly in Canva: + +![Design opened in Canva browser tab](../../../../../assets/canva9.png) + +## Practical Use Cases + +### Marketing Campaign Development +``` +Create a social media campaign for our new product launch. Generate Instagram posts, Facebook covers, and LinkedIn banners with consistent branding. +``` + +### Presentation Automation +``` +Search for our Q4 sales presentation and create a simplified 5-slide version for the board meeting. +``` + +### Brand Asset Management +``` +List all designs in our "2025 Marketing" folder and export the approved ones as PDFs. +``` + +### Design Iteration +``` +Find our company logo designs from last month and resize them for business cards, letterheads, and email signatures. +``` + +### Content Extraction +``` +Extract all text from our employee handbook presentation so I can update it in our documentation. +``` + +### Collaborative Review +``` +Add a comment to the new website mockup asking the design team about the color scheme choices. +``` + +## Workflow Tips + +### Effective Design Generation +- **Be specific**: "Create a minimalist tech company logo with blue and silver colors" +- **Specify format**: "Generate an Instagram story template for product announcements" +- **Include context**: "Design a professional LinkedIn banner for a AI research company" +- **Request variations**: Ask for multiple options to choose from + +### Organization Best Practices +- Create folders before generating multiple designs +- Use descriptive names for easy searching later +- Move designs to appropriate folders immediately +- Export important designs for backup + +### Integration Patterns +- Generate designs → Review options → Select preferred → Open in Canva for fine-tuning +- Search existing designs → Extract content → Generate new versions +- Create templates → Resize for multiple platforms → Export all variants + +## Limitations and Considerations + +**Design Editing**: While the MCP can create and manage designs, actual editing requires opening Canva's interface. + +**Project Access**: The integration may not access all historical projects immediately, focusing on designs created or modified after connection. + +**Generation Time**: AI design generation takes a few moments. The tool provides job IDs to track progress. + +**Team Permissions**: Access depends on your Canva team settings and subscription level. + +## Troubleshooting + +**Authentication Issues:** +- Clear browser cookies for Canva +- Try logging out and back into Canva +- Ensure pop-ups aren't blocked for OAuth flow +- Check team admin permissions if applicable + +**Design Generation Failures:** +- Verify you have creation rights in selected team +- Check Canva subscription limits +- Try simpler design prompts first +- Ensure stable internet connection + +**Tool Availability:** +- Some tools require specific Canva plans +- Team features need appropriate permissions +- Verify MCP server is showing as active +- Restart Jan after authentication + +**Search Problems:** +- Use search-designs (not the legacy search tool) +- Be specific with design types and names +- Check folder permissions for team content +- Allow time for new designs to index + + + +## Advanced Workflows + +### Batch Operations +``` +Create 5 variations of our product announcement banner, then resize all of them for Twitter, LinkedIn, and Facebook. +``` + +### Content Migration +``` +Import all designs from [URLs], organize them into a "2025 Campaign" folder, and add review comments for the team. +``` + +### Automated Reporting +``` +Search for all presentation designs created this month, extract their content, and summarize the key themes. +``` + +## Next Steps + +Canva MCP bridges the gap between conversational AI and visual design. Instead of describing what you want and then manually creating it, you can generate professional designs directly through natural language commands. + +The real power emerges when combining multiple tools - searching existing assets, generating new variations, organizing content, and collaborating with teams, all within a single conversation flow. diff --git a/website/src/content/docs/local-server/api-server.mdx b/website/src/content/docs/local-server/api-server.mdx index c4ac4c32d..9ab97865e 100644 --- a/website/src/content/docs/local-server/api-server.mdx +++ b/website/src/content/docs/local-server/api-server.mdx @@ -17,33 +17,22 @@ keywords: API key ] --- -import { Aside, Steps } from '@astrojs/starlight/components'; +import { Aside, Steps } from '@astrojs/starlight/components' -Jan provides a built-in, OpenAI-compatible API server that runs entirely on your computer, -powered by `llama.cpp`. Use it as a drop-in replacement for cloud APIs to build private, -offline-capable AI applications. +Jan provides a built-in, OpenAI-compatible API server that runs entirely on your computer, powered by `llama.cpp`. Use it as a drop-in replacement for cloud APIs to build private, offline-capable AI applications. ![Jan's Local API Server Settings UI](../../../assets/api-server-ui.png) ## Quick Start -### 1. Start the Server - +### Start the Server 1. Navigate to **Settings** > **Local API Server**. 2. Enter a custom **API Key** (e.g., `secret-key-123`). This is required for all requests. 3. Click **Start Server**. -The server is ready when the logs show `JAN API listening at http://12.0.0.1:1337`. +The server is ready when the logs show `JAN API listening at http://127.0.0.1:1337`. -### 2. Load a model with cURL - -```sh -curl http://127.0.0.1:1337/v1/models/start -H "Content-Type: application/json" \ - -H "Authorization: Bearer secret-key-123" \ - -d '{"model": "gemma3:12b"}' -``` - -### 3. Test with cURL +### Test with cURL Open a terminal and make a request. Replace `YOUR_MODEL_ID` with the ID of an available model in Jan. ```bash @@ -95,7 +84,7 @@ A comma-separated list of hostnames allowed to access the server. This provides ## Troubleshooting - + ## Performance Settings diff --git a/website/src/content/docs/local-server/settings.mdx b/website/src/content/docs/local-server/settings.mdx index 8819442ba..8f366ef5f 100644 --- a/website/src/content/docs/local-server/settings.mdx +++ b/website/src/content/docs/local-server/settings.mdx @@ -14,11 +14,11 @@ keywords: ] --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; -import { Steps } from '@astrojs/starlight/components'; -import { Aside } from '@astrojs/starlight/components'; +import { Aside, Steps } from '@astrojs/starlight/components' -Access Jan's settings by clicking the ⚙️ icon in the bottom left corner. +# Settings + +Access Jan's settings by clicking the Settings icon in the bottom left corner. ## Managing AI Models @@ -163,41 +163,17 @@ Jan stores everything locally on your computer in standard file formats. This duplicates your data to the new location - your original files stay safe. -## Network Settings +## Local API Server -### HTTPS Proxy Setup +All settings for running Jan as a local, OpenAI-compatible server have been moved to their own dedicated page for clarity. -If you need to connect through a corporate network or want enhanced privacy: +This includes configuration for: +- Server Host and Port +- API Keys +- CORS (Cross-Origin Resource Sharing) +- Verbose Logging -1. **Enable** the proxy toggle -2. Enter your proxy details: -``` -http://:@: -``` - -**Example:** -``` -http://user:pass@proxy.company.com:8080 -``` - -![HTTPS Proxy](../../../assets/settings-13.png) - - - -### SSL Certificate Handling - -**Ignore SSL Certificates:** Only enable this for: -- Corporate networks with internal certificates -- Development/testing environments -- Trusted network setups - -![Ignore SSL Certificates](../../../assets/settings-14.png) - - +[**Go to Local API Server Settings →**](/docs/local-server/api-server) ## Emergency Options @@ -218,7 +194,7 @@ Only enable if you trust your network environment completely. ![Reset Confirmation](../../../assets/settings-18.png) -