Merge branch 'dev' into blog/add-deepresearch-piece
98
.github/workflows/jan-astro-docs.yml
vendored
Normal file
@ -0,0 +1,98 @@
|
||||
name: Jan Astro Docs
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- dev
|
||||
paths:
|
||||
- 'website/**'
|
||||
- '.github/workflows/jan-astro-docs.yml'
|
||||
pull_request:
|
||||
paths:
|
||||
- 'website/**'
|
||||
- '.github/workflows/jan-astro-docs.yml'
|
||||
# Review gh actions docs if you want to further define triggers, paths, etc
|
||||
# https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#on
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Deploy to CloudFlare Pages
|
||||
env:
|
||||
CLOUDFLARE_PROJECT_NAME: astro-docs
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
deployments: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- uses: oven-sh/setup-bun@v2
|
||||
|
||||
- name: Install jq
|
||||
uses: dcarbone/install-jq-action@v2.0.1
|
||||
|
||||
- name: Fill env vars
|
||||
continue-on-error: true
|
||||
working-directory: website
|
||||
run: |
|
||||
env_example_file=".env.example"
|
||||
touch .env
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
if [[ "$line" == *"="* ]]; then
|
||||
var_name=$(echo $line | cut -d '=' -f 1)
|
||||
echo $var_name
|
||||
var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
|
||||
echo "$var_name=$var_value" >> .env
|
||||
fi
|
||||
done < "$env_example_file"
|
||||
env:
|
||||
SECRETS: '${{ toJson(secrets) }}'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: website
|
||||
run: bun install
|
||||
- name: Build website
|
||||
working-directory: website
|
||||
run: bun run build
|
||||
|
||||
- name: copy redirects and headers
|
||||
continue-on-error: true
|
||||
working-directory: website
|
||||
run: |
|
||||
cp _redirects dist/_redirects
|
||||
cp _headers dist/_headers
|
||||
|
||||
- name: Publish to Cloudflare Pages PR Preview and Staging
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: cloudflare/pages-action@v1
|
||||
with:
|
||||
apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
|
||||
accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
|
||||
projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
|
||||
directory: ./website/dist
|
||||
# Optional: Enable this if you want to have GitHub Deployments triggered
|
||||
gitHubToken: ${{ secrets.GITHUB_TOKEN }}
|
||||
id: deployCloudflarePages
|
||||
|
||||
- uses: mshick/add-pr-comment@v2
|
||||
if: github.event_name == 'pull_request'
|
||||
with:
|
||||
message: |
|
||||
Preview URL Astro Docs: ${{ steps.deployCloudflarePages.outputs.url }}
|
||||
|
||||
- name: Publish to Cloudflare Pages Production
|
||||
if: (github.event_name == 'push' && github.ref == 'refs/heads/dev') || (github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/dev')
|
||||
uses: cloudflare/pages-action@v1
|
||||
with:
|
||||
apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
|
||||
accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
|
||||
projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
|
||||
directory: ./website/dist
|
||||
branch: main
|
||||
# Optional: Enable this if you want to have GitHub Deployments triggered
|
||||
gitHubToken: ${{ secrets.GITHUB_TOKEN }}
|
||||
63
.github/workflows/jan-docs-new-release.yaml
vendored
@ -1,63 +0,0 @@
|
||||
name: Deploy Docs on new release
|
||||
|
||||
on:
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
- edited
|
||||
- released
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
name: Deploy to CloudFlare Pages
|
||||
env:
|
||||
CLOUDFLARE_PROJECT_NAME: docs
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
deployments: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
ref: dev
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
- name: Install jq
|
||||
uses: dcarbone/install-jq-action@v2.0.1
|
||||
|
||||
- name: Fill env vars
|
||||
working-directory: docs
|
||||
run: |
|
||||
env_example_file=".env.example"
|
||||
touch .env
|
||||
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||
if [[ "$line" == *"="* ]]; then
|
||||
var_name=$(echo $line | cut -d '=' -f 1)
|
||||
echo $var_name
|
||||
var_value="$(jq -r --arg key "$var_name" '.[$key]' <<< "$SECRETS")"
|
||||
echo "$var_name=$var_value" >> .env
|
||||
fi
|
||||
done < "$env_example_file"
|
||||
env:
|
||||
SECRETS: '${{ toJson(secrets) }}'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: docs
|
||||
run: yarn install
|
||||
- name: Build website
|
||||
working-directory: docs
|
||||
run: export NODE_ENV=production && yarn build && cp _redirects out/_redirects && cp _headers out/_headers
|
||||
|
||||
- name: Publish to Cloudflare Pages Production
|
||||
uses: cloudflare/pages-action@v1
|
||||
with:
|
||||
apiToken: ${{ secrets.CLOUDFLARE_API_TOKEN }}
|
||||
accountId: ${{ secrets.CLOUDFLARE_ACCOUNT_ID }}
|
||||
projectName: ${{ env.CLOUDFLARE_PROJECT_NAME }}
|
||||
directory: ./docs/out
|
||||
branch: main
|
||||
# Optional: Enable this if you want to have GitHub Deployments triggered
|
||||
gitHubToken: ${{ secrets.GITHUB_TOKEN }}
|
||||
2
.github/workflows/jan-docs.yml
vendored
@ -26,7 +26,7 @@ jobs:
|
||||
deployments: write
|
||||
pull-requests: write
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: 20
|
||||
|
||||
2
.github/workflows/jan-linter-and-test.yml
vendored
@ -1,4 +1,4 @@
|
||||
name: Test - Linter & Playwright
|
||||
name: Linter & Test
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
|
||||
52
.github/workflows/jan-tauri-build-nightly.yaml
vendored
@ -20,6 +20,7 @@ on:
|
||||
jobs:
|
||||
set-public-provider:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
|
||||
outputs:
|
||||
public_provider: ${{ steps.set-public-provider.outputs.public_provider }}
|
||||
ref: ${{ steps.set-public-provider.outputs.ref }}
|
||||
@ -47,11 +48,13 @@ jobs:
|
||||
fi
|
||||
# Job create Update app version based on latest release tag with build number and save to output
|
||||
get-update-version:
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
|
||||
uses: ./.github/workflows/template-get-update-version.yml
|
||||
|
||||
build-macos:
|
||||
uses: ./.github/workflows/template-tauri-build-macos.yml
|
||||
needs: [get-update-version, set-public-provider]
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
|
||||
secrets: inherit
|
||||
with:
|
||||
ref: ${{ needs.set-public-provider.outputs.ref }}
|
||||
@ -64,6 +67,7 @@ jobs:
|
||||
uses: ./.github/workflows/template-tauri-build-windows-x64.yml
|
||||
secrets: inherit
|
||||
needs: [get-update-version, set-public-provider]
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
|
||||
with:
|
||||
ref: ${{ needs.set-public-provider.outputs.ref }}
|
||||
public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
|
||||
@ -74,6 +78,7 @@ jobs:
|
||||
uses: ./.github/workflows/template-tauri-build-linux-x64.yml
|
||||
secrets: inherit
|
||||
needs: [get-update-version, set-public-provider]
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
|
||||
with:
|
||||
ref: ${{ needs.set-public-provider.outputs.ref }}
|
||||
public_provider: ${{ needs.set-public-provider.outputs.public_provider }}
|
||||
@ -91,6 +96,7 @@ jobs:
|
||||
build-macos,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.full_name == github.repository
|
||||
steps:
|
||||
- name: Getting the repo
|
||||
uses: actions/checkout@v3
|
||||
@ -224,49 +230,3 @@ jobs:
|
||||
RUN_ID=${{ github.run_id }}
|
||||
COMMENT="This is the build for this pull request. You can download it from the Artifacts section here: [Build URL](https://github.com/${{ github.repository }}/actions/runs/${RUN_ID})."
|
||||
gh pr comment $PR_URL --body "$COMMENT"
|
||||
|
||||
# AutoQA trigger for S3 builds
|
||||
trigger-autoqa-s3:
|
||||
needs:
|
||||
[
|
||||
build-macos,
|
||||
build-windows-x64,
|
||||
build-linux-x64,
|
||||
get-update-version,
|
||||
set-public-provider,
|
||||
sync-temp-to-latest,
|
||||
]
|
||||
if: needs.set-public-provider.outputs.public_provider == 'aws-s3'
|
||||
uses: ./.github/workflows/autoqa-template.yml
|
||||
with:
|
||||
jan_app_windows_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_x64-setup.exe'
|
||||
jan_app_ubuntu_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_amd64.deb'
|
||||
jan_app_macos_source: 'https://delta.jan.ai/nightly/Jan-nightly_${{ needs.get-update-version.outputs.new_version }}_universal.dmg'
|
||||
is_nightly: true
|
||||
source_type: 'url'
|
||||
secrets:
|
||||
RP_TOKEN: ${{ secrets.RP_TOKEN }}
|
||||
|
||||
# AutoQA trigger for artifact builds
|
||||
trigger-autoqa-artifacts:
|
||||
needs:
|
||||
[
|
||||
build-macos,
|
||||
build-windows-x64,
|
||||
build-linux-x64,
|
||||
get-update-version,
|
||||
set-public-provider,
|
||||
]
|
||||
if: needs.set-public-provider.outputs.public_provider == 'none'
|
||||
uses: ./.github/workflows/autoqa-template.yml
|
||||
with:
|
||||
jan_app_windows_source: '' # Not needed for artifacts
|
||||
jan_app_ubuntu_source: '' # Not needed for artifacts
|
||||
jan_app_macos_source: '' # Not needed for artifacts
|
||||
is_nightly: true
|
||||
source_type: 'local'
|
||||
artifact_name_windows: 'jan-windows-${{ needs.get-update-version.outputs.new_version }}'
|
||||
artifact_name_ubuntu: 'jan-linux-amd64-${{ needs.get-update-version.outputs.new_version }}-deb'
|
||||
artifact_name_macos: 'jan-nightly-mac-universal-${{ needs.get-update-version.outputs.new_version }}.dmg'
|
||||
secrets:
|
||||
RP_TOKEN: ${{ secrets.RP_TOKEN }}
|
||||
|
||||
5
.vscode/extensions.json
vendored
@ -1,5 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"esbenp.prettier-vscode"
|
||||
]
|
||||
}
|
||||
7
.vscode/settings.json
vendored
@ -1,7 +0,0 @@
|
||||
{
|
||||
"editor.defaultFormatter": "esbenp.prettier-vscode",
|
||||
"editor.formatOnSave": true,
|
||||
"[rust]": {
|
||||
"editor.defaultFormatter": "rust-lang.rust-analyzer"
|
||||
}
|
||||
}
|
||||
40
README.md
@ -1,6 +1,6 @@
|
||||
# Jan - Local AI Assistant
|
||||
|
||||

|
||||

|
||||
|
||||
<p align="center">
|
||||
<!-- ALL-CONTRIBUTORS-BADGE:START - Do not remove or modify this section -->
|
||||
@ -12,62 +12,50 @@
|
||||
</p>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://jan.ai/docs/quickstart">Getting Started</a>
|
||||
- <a href="https://jan.ai/docs">Docs</a>
|
||||
- <a href="https://jan.ai/changelog">Changelog</a>
|
||||
- <a href="https://github.com/menloresearch/jan/issues">Bug reports</a>
|
||||
<a href="https://jan.ai/docs/quickstart">Getting Started</a>
|
||||
- <a href="https://jan.ai/docs">Docs</a>
|
||||
- <a href="https://jan.ai/changelog">Changelog</a>
|
||||
- <a href="https://github.com/menloresearch/jan/issues">Bug reports</a>
|
||||
- <a href="https://discord.gg/AsJ8krTT3N">Discord</a>
|
||||
</p>
|
||||
|
||||
Jan is a ChatGPT-alternative that runs 100% offline on your device. Our goal is to make it easy for a layperson to download and run LLMs and use AI with **full control** and **privacy**.
|
||||
|
||||
**⚠️ Jan is in active development.**
|
||||
Jan is an AI assistant that can run 100% offline on your device. Download and run LLMs with
|
||||
**full control** and **privacy**.
|
||||
|
||||
## Installation
|
||||
|
||||
Because clicking a button is still the easiest way to get started:
|
||||
The easiest way to get started is by downloading one of the following versions for your respective operating system:
|
||||
|
||||
<table>
|
||||
<tr>
|
||||
<td><b>Platform</b></td>
|
||||
<td><b>Stable</b></td>
|
||||
<td><b>Beta</b></td>
|
||||
<td><b>Nightly</b></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Windows</b></td>
|
||||
<td><a href='https://app.jan.ai/download/latest/win-x64'>jan.exe</a></td>
|
||||
<td><a href='https://app.jan.ai/download/beta/win-x64'>jan.exe</a></td>
|
||||
<td><a href='https://app.jan.ai/download/nightly/win-x64'>jan.exe</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>macOS</b></td>
|
||||
<td><a href='https://app.jan.ai/download/latest/mac-universal'>jan.dmg</a></td>
|
||||
<td><a href='https://app.jan.ai/download/beta/mac-universal'>jan.dmg</a></td>
|
||||
<td><a href='https://app.jan.ai/download/nightly/mac-universal'>jan.dmg</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Linux (deb)</b></td>
|
||||
<td><a href='https://app.jan.ai/download/latest/linux-amd64-deb'>jan.deb</a></td>
|
||||
<td><a href='https://app.jan.ai/download/beta/linux-amd64-deb'>jan.deb</a></td>
|
||||
<td><a href='https://app.jan.ai/download/nightly/linux-amd64-deb'>jan.deb</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><b>Linux (AppImage)</b></td>
|
||||
<td><a href='https://app.jan.ai/download/latest/linux-amd64-appimage'>jan.AppImage</a></td>
|
||||
<td><a href='https://app.jan.ai/download/beta/linux-amd64-appimage'>jan.AppImage</a></td>
|
||||
<td><a href='https://app.jan.ai/download/nightly/linux-amd64-appimage'>jan.AppImage</a></td>
|
||||
</tr>
|
||||
</table>
|
||||
|
||||
Download from [jan.ai](https://jan.ai/) or [GitHub Releases](https://github.com/menloresearch/jan/releases).
|
||||
|
||||
## Demo
|
||||
|
||||
<video width="100%" controls>
|
||||
<source src="./docs/public/assets/videos/enable-tool-call-for-models.mp4" type="video/mp4">
|
||||
Your browser does not support the video tag.
|
||||
</video>
|
||||
|
||||
## Features
|
||||
|
||||
@ -149,13 +137,12 @@ For detailed compatibility, check our [installation guides](https://jan.ai/docs/
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
When things go sideways (they will):
|
||||
If things go sideways:
|
||||
|
||||
1. Check our [troubleshooting docs](https://jan.ai/docs/troubleshooting)
|
||||
2. Copy your error logs and system specs
|
||||
3. Ask for help in our [Discord](https://discord.gg/FTk2MvZwJH) `#🆘|jan-help` channel
|
||||
|
||||
We keep logs for 24 hours, so don't procrastinate on reporting issues.
|
||||
|
||||
## Contributing
|
||||
|
||||
@ -175,15 +162,6 @@ Contributions welcome. See [CONTRIBUTING.md](CONTRIBUTING.md) for the full spiel
|
||||
- **Jobs**: hr@jan.ai
|
||||
- **General Discussion**: [Discord](https://discord.gg/FTk2MvZwJH)
|
||||
|
||||
## Trust & Safety
|
||||
|
||||
**Friendly reminder**: We're not trying to scam you.
|
||||
|
||||
- We won't ask for personal information
|
||||
- Jan is completely free (no premium version exists)
|
||||
- We don't have a cryptocurrency or ICO
|
||||
- We're bootstrapped and not seeking your investment (yet)
|
||||
|
||||
## License
|
||||
|
||||
Apache 2.0 - Because sharing is caring.
|
||||
|
||||
@ -132,6 +132,12 @@ export abstract class BaseExtension implements ExtensionType {
|
||||
setting.controllerProps.options = setting.controllerProps.options?.length
|
||||
? setting.controllerProps.options
|
||||
: oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.options
|
||||
if ('recommended' in setting.controllerProps) {
|
||||
const oldRecommended = oldSettings.find((e: any) => e.key === setting.key)?.controllerProps?.recommended
|
||||
if (oldRecommended !== undefined && oldRecommended !== "") {
|
||||
setting.controllerProps.recommended = oldRecommended
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
localStorage.setItem(this.name, JSON.stringify(settings))
|
||||
|
||||
@ -60,4 +60,5 @@ export type DropdownComponentProps = {
|
||||
value: string
|
||||
type?: InputType
|
||||
options?: DropdownOption[]
|
||||
recommended?: string
|
||||
}
|
||||
|
||||
BIN
docs/src/pages/docs/_assets/hf_hub.png
Normal file
|
After Width: | Height: | Size: 203 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano.png
Normal file
|
After Width: | Height: | Size: 1.6 MiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_2.png
Normal file
|
After Width: | Height: | Size: 171 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_3.png
Normal file
|
After Width: | Height: | Size: 139 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_4.png
Normal file
|
After Width: | Height: | Size: 152 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_5.png
Normal file
|
After Width: | Height: | Size: 417 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_6.png
Normal file
|
After Width: | Height: | Size: 405 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_7.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_8.png
Normal file
|
After Width: | Height: | Size: 661 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_nano_9.png
Normal file
|
After Width: | Height: | Size: 158 KiB |
BIN
docs/src/pages/docs/_assets/hf_jan_setup.png
Normal file
|
After Width: | Height: | Size: 642 KiB |
BIN
docs/src/pages/docs/_assets/hf_providers.png
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
@ -26,5 +26,9 @@
|
||||
"openrouter": {
|
||||
"title": "OpenRouter",
|
||||
"href": "/docs/remote-models/openrouter"
|
||||
},
|
||||
"huggingface": {
|
||||
"title": "Hugging Face",
|
||||
"href": "/docs/remote-models/huggingface"
|
||||
}
|
||||
}
|
||||
|
||||
152
docs/src/pages/docs/remote-models/huggingface.mdx
Normal file
@ -0,0 +1,152 @@
|
||||
---
|
||||
title: Hugging Face
|
||||
description: Learn how to integrate Hugging Face models with Jan using the Router or Inference Endpoints.
|
||||
keywords:
|
||||
[
|
||||
Hugging Face,
|
||||
Jan,
|
||||
Jan AI,
|
||||
Hugging Face Router,
|
||||
Hugging Face Inference Endpoints,
|
||||
Hugging Face API,
|
||||
Hugging Face Integration,
|
||||
Hugging Face API Integration
|
||||
]
|
||||
---
|
||||
|
||||
import { Callout, Steps } from 'nextra/components'
|
||||
import { Settings, Plus } from 'lucide-react'
|
||||
|
||||
# Hugging Face
|
||||
|
||||
Jan supports Hugging Face models through two methods: the new **HF Router** (recommended) and **Inference Endpoints**. Both methods require a Hugging Face token and **billing to be set up**.
|
||||
|
||||

|
||||
|
||||
## Option 1: HF Router (Recommended)
|
||||
|
||||
The HF Router provides access to models from multiple providers (Replicate, Together AI, SambaNova, Fireworks, Cohere, and more) through a single endpoint.
|
||||
|
||||
<Steps>
|
||||
|
||||
### Step 1: Get Your HF Token
|
||||
|
||||
Visit [Hugging Face Settings > Access Tokens](https://huggingface.co/settings/tokens) and create a token. Make sure you have billing set up on your account.
|
||||
|
||||
### Step 2: Configure Jan
|
||||
|
||||
1. Go to **Settings** > **Model Providers** > **HuggingFace**
|
||||
2. Enter your HF token
|
||||
3. Use this URL: `https://router.huggingface.co/v1`
|
||||
|
||||

|
||||
|
||||
You can find out more about the HF Router [here](https://huggingface.co/docs/inference-providers/index).
|
||||
|
||||
### Step 3: Start Using Models
|
||||
|
||||
Jan comes with three HF Router models pre-configured. Select one and start chatting immediately.
|
||||
|
||||
</Steps>
|
||||
|
||||
<Callout type='info'>
|
||||
The HF Router automatically routes your requests to the best available provider for each model, giving you access to a wide variety of models without managing individual endpoints.
|
||||
</Callout>
|
||||
|
||||
## Option 2: HF Inference Endpoints
|
||||
|
||||
For more control over specific models and deployment configurations, you can use Hugging Face Inference Endpoints.
|
||||
|
||||
<Steps>
|
||||
|
||||
### Step 1: Navigate to the HuggingFace Model Hub
|
||||
|
||||
Visit the [Hugging Face Model Hub](https://huggingface.co/models) (make sure you are logged in) and pick the model you want to use.
|
||||
|
||||

|
||||
|
||||
### Step 2: Configure HF Inference Endpoint and Deploy
|
||||
|
||||
After you have selected the model you want to use, click on the **Deploy** button and select a deployment method. We will select HF Inference Endpoints for this one.
|
||||
|
||||

|
||||
<br/>
|
||||
|
||||
This will take you to the deployment set up page. For this example, we will leave the default settings as they are under the GPU tab and click on **Create Endpoint**.
|
||||
|
||||

|
||||
<br/>
|
||||
|
||||
Once your endpoint is ready, test that it works on the **Test your endpoint** tab.
|
||||
|
||||

|
||||
<br/>
|
||||
|
||||
If you get a response, you can click on **Copy** to copy the endpoint URL and API key.
|
||||
|
||||
<Callout type='info'>
|
||||
You will need to be logged into the HuggingFace Inference Endpoints and have a credit card on file to deploy a model.
|
||||
</Callout>
|
||||
|
||||
### Step 3: Configure Jan
|
||||
|
||||
If you do not have an API key you can create one under **Settings** > **Access Tokens** [here](https://huggingface.co/settings/tokens). Once you finish, copy the token and add it to Jan alongside your endpoint URL at **Settings** > **Model Providers** > **HuggingFace**.
|
||||
|
||||
**3.1 HF Token**
|
||||

|
||||
<br/>
|
||||
|
||||
**3.2 HF Endpoint URL**
|
||||

|
||||
<br/>
|
||||
|
||||
**3.3 Jan Settings**
|
||||

|
||||
|
||||
<Callout type='warning'>
|
||||
Make sure to add `/v1/` to the end of your endpoint URL. This is required by the OpenAI API.
|
||||
</Callout>
|
||||
|
||||
**3.4 Add Model Details**
|
||||

|
||||
|
||||
### Step 4: Start Using the Model
|
||||
|
||||
Now you can start using the model in any chat.
|
||||
|
||||

|
||||
|
||||
If you want to learn how to use Jan Nano with MCP, check out [the guide here](../jan-models/jan-nano-32).
|
||||
<br/>
|
||||
|
||||
</Steps>
|
||||
|
||||
## Available Hugging Face Models
|
||||
|
||||
**Option 1 (HF Router):** Access to models from multiple providers as shown in the providers image above.
|
||||
|
||||
**Option 2 (Inference Endpoints):** You can follow the steps above with a large amount of models on Hugging Face and bring them to Jan. Check out other models in the [Hugging Face Model Hub](https://huggingface.co/models).
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
Common issues and solutions:
|
||||
|
||||
**1. Started a chat but the model is not responding**
|
||||
- Verify your API_KEY/HF_TOKEN is correct and not expired
|
||||
- Ensure you have billing set up on your HF account
|
||||
- For Inference Endpoints: Ensure the model you're trying to use is running again since, after a while, they go idle so that you don't get charged when you are not using it
|
||||
|
||||

|
||||
|
||||
**2. Connection Problems**
|
||||
- Check your internet connection
|
||||
- Verify Hugging Face's system status
|
||||
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
|
||||
|
||||
**3. Model Unavailable**
|
||||
- Confirm your API key has access to the model
|
||||
- Check if you're using the correct model ID
|
||||
- Verify your Hugging Face account has the necessary permissions
|
||||
|
||||
Need more help? Join our [Discord community](https://discord.gg/FTk2MvZwJH) or check the
|
||||
[Hugging Face's documentation](https://docs.huggingface.co/en/inference-endpoints/index).
|
||||
@ -1,9 +0,0 @@
|
||||
{
|
||||
"-- Switcher": {
|
||||
"type": "separator",
|
||||
"title": "Switcher"
|
||||
},
|
||||
"index": {
|
||||
"display": "hidden"
|
||||
}
|
||||
}
|
||||
@ -1,87 +0,0 @@
|
||||
---
|
||||
title: Coming Soon
|
||||
description: Exciting new features and platforms are on the way. Stay tuned for Jan Web, Jan Mobile, and our API Platform.
|
||||
keywords:
|
||||
[
|
||||
Jan,
|
||||
Customizable Intelligence, LLM,
|
||||
local AI,
|
||||
privacy focus,
|
||||
free and open source,
|
||||
private and offline,
|
||||
conversational AI,
|
||||
no-subscription fee,
|
||||
large language models,
|
||||
coming soon,
|
||||
Jan Web,
|
||||
Jan Mobile,
|
||||
API Platform,
|
||||
]
|
||||
---
|
||||
|
||||
import { Callout } from 'nextra/components'
|
||||
|
||||
<div className="text-center py-12">
|
||||
<div className="mb-8">
|
||||
<h1 className="text-4xl font-bold bg-gradient-to-r from-blue-600 to-purple-600 bg-clip-text text-transparent mb-4 py-2">
|
||||
🚀 Coming Soon
|
||||
</h1>
|
||||
<p className="text-xl text-gray-600 dark:text-gray-300 max-w-2xl mx-auto">
|
||||
We're working on the next stage of Jan - making our local assistant more powerful and available in more platforms.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="grid grid-cols-1 md:grid-cols-3 gap-6 max-w-4xl mx-auto mb-12">
|
||||
<div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-blue-50 to-indigo-50 dark:from-blue-900/20 dark:to-indigo-900/20">
|
||||
<div className="text-3xl mb-3">🌐</div>
|
||||
<h3 className="text-lg font-semibold mb-2">Jan Web</h3>
|
||||
<p className="text-sm text-gray-600 dark:text-gray-400">
|
||||
Access Jan directly from your browser with our powerful web interface
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-green-50 to-emerald-50 dark:from-green-900/20 dark:to-emerald-900/20">
|
||||
<div className="text-3xl mb-3">📱</div>
|
||||
<h3 className="text-lg font-semibold mb-2">Jan Mobile</h3>
|
||||
<p className="text-sm text-gray-600 dark:text-gray-400">
|
||||
Take Jan on the go with our native mobile applications
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div className="p-6 border border-gray-200 dark:border-gray-700 rounded-lg bg-gradient-to-br from-purple-50 to-pink-50 dark:from-purple-900/20 dark:to-pink-900/20">
|
||||
<div className="text-3xl mb-3">⚡</div>
|
||||
<h3 className="text-lg font-semibold mb-2">Jan Server</h3>
|
||||
<p className="text-sm text-gray-600 dark:text-gray-400">
|
||||
Integrate Jan's capabilities into your applications with our API
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Callout type="info">
|
||||
**Stay Updated**: Follow our [GitHub repository](https://github.com/menloresearch/jan) and join our [Discord community](https://discord.com/invite/FTk2MvZwJH) for the latest updates on these exciting releases!
|
||||
</Callout>
|
||||
|
||||
<div className="mt-12">
|
||||
<h2 className="text-2xl font-semibold mb-6">What to Expect</h2>
|
||||
<div className="text-left max-w-2xl mx-auto space-y-4">
|
||||
<div className="flex items-start gap-3">
|
||||
<span className="text-green-500 text-xl">✓</span>
|
||||
<div>
|
||||
<strong>Seamless Experience:</strong> Unified interface across all platforms
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<span className="text-green-500 text-xl">✓</span>
|
||||
<div>
|
||||
<strong>Privacy First:</strong> Same privacy-focused approach you trust
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-start gap-3">
|
||||
<span className="text-green-500 text-xl">✓</span>
|
||||
<div>
|
||||
<strong>Developer Friendly:</strong> Robust APIs and comprehensive documentation
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
BIN
docs/src/pages/post/_assets/gpt-oss locally.jpeg
Normal file
|
After Width: | Height: | Size: 235 KiB |
BIN
docs/src/pages/post/_assets/jan gpt-oss.jpeg
Normal file
|
After Width: | Height: | Size: 233 KiB |
BIN
docs/src/pages/post/_assets/jan hub gpt-oss locally.jpeg
Normal file
|
After Width: | Height: | Size: 470 KiB |
BIN
docs/src/pages/post/_assets/run gpt-oss locally in jan.jpeg
Normal file
|
After Width: | Height: | Size: 270 KiB |
211
docs/src/pages/post/run-gpt-oss-locally.mdx
Normal file
@ -0,0 +1,211 @@
|
||||
---
|
||||
title: "Run OpenAI's gpt-oss locally in 5 mins (Beginner Guide)"
|
||||
description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally. Step-by-step setup with Jan AI for private, offline AI conversations."
|
||||
tags: OpenAI, gpt-oss, local AI, Jan, privacy, Apache-2.0, llama.cpp, Ollama, LM Studio
|
||||
categories: guides
|
||||
date: 2025-08-06
|
||||
ogImage: assets/gpt-oss%20locally.jpeg
|
||||
twitter:
|
||||
card: summary_large_image
|
||||
site: "@jandotai"
|
||||
title: "Run OpenAI's gpt-oss Locally in 5 Minutes (Beginner Guide)"
|
||||
description: "Complete 5-minute beginner guide to running OpenAI's gpt-oss locally with Jan AI for private, offline conversations."
|
||||
image: assets/gpt-oss%20locally.jpeg
|
||||
---
|
||||
import { Callout } from 'nextra/components'
|
||||
import CTABlog from '@/components/Blog/CTA'
|
||||
|
||||
# Run OpenAI's gpt-oss Locally in 5 mins
|
||||
|
||||

|
||||
|
||||
OpenAI launched [gpt-oss](https://openai.com/index/introducing-gpt-oss/), marking their return to open-source AI after GPT-2. This model is designed to run locally on consumer hardware. This guide shows you how to install and run gpt-oss on your computer for private, offline AI conversations.
|
||||
|
||||
## What is gpt-oss?
|
||||
|
||||
gpt-oss is OpenAI's open-source large language model, released under the Apache-2.0 license. Unlike ChatGPT, gpt-oss:
|
||||
|
||||
- Runs completely offline - No internet required after setup
|
||||
- 100% private - Your conversations never leave your device
|
||||
- Unlimited usage - No token limits or rate limiting
|
||||
- Free forever - No subscription fees
|
||||
- Commercial use allowed - Apache-2.0 license permits business use
|
||||
|
||||
Running AI models locally means everything happens on your own hardware, giving you complete control over your data and conversations.
|
||||
|
||||
## gpt-oss System Requirements
|
||||
|
||||
| Component | Minimum | Recommended |
|
||||
|-----------|---------|-------------|
|
||||
| **RAM** | 16 GB | 32 GB+ |
|
||||
| **Storage** | 11+ GB free | 25 GB+ free |
|
||||
| **CPU** | 4 cores | 8+ cores |
|
||||
| **GPU** | Optional | Modern GPU with 6GB+ VRAM recommended |
|
||||
| **OS** | Windows 10+, macOS 11+, Linux | Latest versions |
|
||||
|
||||
**Installation apps available:**
|
||||
- **Jan** (Recommended - easiest setup)
|
||||
- **llama.cpp** (Command line)
|
||||
- **Ollama** (Docker-based)
|
||||
- **LM Studio** (GUI alternative)
|
||||
|
||||
## How to install gpt-oss locally with Jan (5 mins)
|
||||
|
||||
### Step 1: Download Jan
|
||||
|
||||
First download Jan to run gpt-oss locally: [Download Jan AI](https://jan.ai/)
|
||||
|
||||
<Callout type="info">
|
||||
Jan is the simplest way to run AI models locally. It automatically handles CPU/GPU optimization, provides a clean chat interface, and requires zero technical knowledge.
|
||||
</Callout>
|
||||
|
||||
### Step 2: Install gpt-oss Model (2-3 minutes)
|
||||
|
||||

|
||||
|
||||
1. Open Jan Hub -> search "gpt-oss" (it appears at the top)
|
||||
2. Click Download and wait for completion (~11GB download)
|
||||
3. Installation is automatic - Jan handles everything
|
||||
|
||||
### Step 3: Start using gpt-oss offline (30 seconds)
|
||||
|
||||

|
||||
|
||||
1. Go to New Chat → select gpt-oss-20b from model picker
|
||||
2. Start chatting - Jan automatically optimizes for your hardware
|
||||
3. You're done! Your AI conversations now stay completely private
|
||||
|
||||
Success: Your gpt-oss setup is complete. No internet required for chatting, unlimited usage, zero subscription fees.
|
||||
|
||||
## Jan with gpt-oss vs ChatGPT vs other Local AI Models
|
||||
|
||||
| Feature | gpt-oss (Local) | ChatGPT Plus | Claude Pro | Other Local Models |
|
||||
|---------|----------------|--------------|------------|-------------------|
|
||||
| Cost | Free forever | $20/month | $20/month | Free |
|
||||
| Privacy | 100% private | Data sent to OpenAI | Data sent to Anthropic | 100% private |
|
||||
| Internet | Offline after setup | Requires internet | Requires internet | Offline |
|
||||
| Usage limits | Unlimited | Rate limited | Rate limited | Unlimited |
|
||||
| Performance | Good (hardware dependent) | Excellent | Excellent | Varies |
|
||||
| Setup difficulty | Easy with Jan | None | None | Varies |
|
||||
|
||||
## Alternative Installation Methods
|
||||
|
||||
### Option 1: Jan (Recommended)
|
||||
|
||||
- Best for: Complete beginners, users wanting GUI interface
|
||||
- Setup time: 5 minutes
|
||||
- Difficulty: Very Easy
|
||||
|
||||
Already covered above - [Download Jan](https://jan.ai/)
|
||||
|
||||
### Option 2: llama.cpp (Command Line)
|
||||
|
||||
- Best for: Developers, terminal users, custom integrations
|
||||
- Setup time: 10-15 minutes
|
||||
- Difficulty: Intermediate
|
||||
|
||||
```bash
|
||||
# macOS
|
||||
brew install llama-cpp
|
||||
|
||||
# Windows: grab Windows exe from releases
|
||||
curl -L -o gpt-oss-20b.gguf https://huggingface.co/openai/gpt-oss-20b-gguf/resolve/main/gpt-oss-20b.gguf
|
||||
./main -m gpt-oss-20b.gguf --chat-simple
|
||||
|
||||
# Add GPU acceleration (adjust -ngl value based on your GPU VRAM)
|
||||
./main -m gpt-oss-20b.gguf --chat-simple -ngl 20
|
||||
```
|
||||
|
||||
### Option 3: Ollama (Docker-Based)
|
||||
|
||||
Best for: Docker users, server deployments
|
||||
Setup time: 5-10 minutes
|
||||
Difficulty: Intermediate
|
||||
|
||||
```bash
|
||||
# Install from https://ollama.com
|
||||
ollama run gpt-oss:20b
|
||||
```
|
||||
|
||||
### Option 4: LM Studio (GUI Alternative)
|
||||
|
||||
Best for: Users wanting GUI but not Jan
|
||||
Setup time: 10 minutes
|
||||
Difficulty: Easy
|
||||
|
||||
1. Download LM Studio from official website
|
||||
2. Go to Models → search "gpt-oss-20b (GGUF)"
|
||||
3. Download the model (wait for completion)
|
||||
4. Go to Chat tab → select the model and start chatting
|
||||
|
||||
## gpt-oss Performance & Troubleshooting
|
||||
|
||||
### Expected Performance Benchmarks
|
||||
|
||||
| Hardware Setup | First Response | Subsequent Responses | Tokens/Second |
|
||||
|---------------|---------------|---------------------|---------------|
|
||||
| **16GB RAM + CPU only** | 30-45 seconds | 3-6 seconds | 3-8 tokens/sec |
|
||||
| **32GB RAM + RTX 3060** | 15-25 seconds | 1-3 seconds | 15-25 tokens/sec |
|
||||
| **32GB RAM + RTX 4080+** | 8-15 seconds | 1-2 seconds | 25-45 tokens/sec |
|
||||
|
||||
### Common Issues & Solutions
|
||||
|
||||
Performance optimization tips:
|
||||
- First response is slow: Normal - kernels compile once, then speed up dramatically
|
||||
- Out of VRAM error: Reduce context length in settings or switch to CPU mode
|
||||
- Out of memory: Close memory-heavy apps (Chrome, games, video editors)
|
||||
- Slow responses: Check if other apps are using GPU/CPU heavily
|
||||
|
||||
Quick fixes:
|
||||
1. Restart Jan if responses become slow
|
||||
2. Lower context window from 4096 to 2048 tokens
|
||||
3. Enable CPU mode if GPU issues persist
|
||||
4. Free up RAM by closing unused applications
|
||||
|
||||
## Frequently Asked Questions (FAQ)
|
||||
|
||||
### Is gpt-oss completely free?
|
||||
Yes! gpt-oss is 100% free under Apache-2.0 license. No subscription fees, no token limits, no hidden costs.
|
||||
|
||||
### How much internet data does gpt-oss use?
|
||||
Only for the initial 11GB download. After installation, gpt-oss works completely offline with zero internet usage.
|
||||
|
||||
### Can I use gpt-oss for commercial projects?
|
||||
Absolutely! The Apache-2.0 license permits commercial use, modification, and distribution.
|
||||
|
||||
### Is gpt-oss better than ChatGPT?
|
||||
gpt-oss offers different advantages: complete privacy, unlimited usage, offline capability, and no costs. ChatGPT may have better performance but requires internet and subscriptions.
|
||||
|
||||
### What happens to my conversations with gpt-oss?
|
||||
Your conversations stay 100% on your device. Nothing is sent to OpenAI, Jan, or any external servers.
|
||||
|
||||
### Can I run gpt-oss on a Mac with 8GB RAM?
|
||||
No, gpt-oss requires minimum 16GB RAM. Consider upgrading your RAM or using cloud-based alternatives.
|
||||
|
||||
### How do I update gpt-oss to newer versions?
|
||||
Jan automatically notifies you of updates. Simply click update in Jan Hub when new versions are available.
|
||||
|
||||
## Why Choose gpt-oss Over ChatGPT Plus?
|
||||
|
||||
gpt-oss advantages:
|
||||
- $0/month vs $20/month for ChatGPT Plus
|
||||
- 100% private - no data leaves your device
|
||||
- Unlimited usage - no rate limits or restrictions
|
||||
- Works offline - no internet required after setup
|
||||
- Commercial use allowed - build businesses with it
|
||||
|
||||
When to choose ChatGPT Plus instead:
|
||||
- You need the absolute best performance
|
||||
- You don't want to manage local installation
|
||||
- You have less than 16GB RAM
|
||||
|
||||
## Get started with gpt-oss today
|
||||
|
||||

|
||||
|
||||
Ready to try gpt-oss?
|
||||
- Download Jan: [https://jan.ai/](https://jan.ai/)
|
||||
- View source code: [https://github.com/menloresearch/jan](https://github.com/menloresearch/jan)
|
||||
- Need help? Check our [local AI guide](/post/run-ai-models-locally) for beginners
|
||||
|
||||
<CTABlog />
|
||||
@ -6,9 +6,11 @@
|
||||
"controllerType": "dropdown",
|
||||
"controllerProps": {
|
||||
"value": "none",
|
||||
"options": []
|
||||
"options": [],
|
||||
"recommended": ""
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"key": "auto_update_engine",
|
||||
"title": "Auto update engine",
|
||||
@ -23,18 +25,6 @@
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": { "value": true }
|
||||
},
|
||||
{
|
||||
"key": "chat_template",
|
||||
"title": "Custom Jinja Chat template",
|
||||
"description": "Custom Jinja chat_template to be used for the model",
|
||||
"controllerType": "input",
|
||||
"controllerProps": {
|
||||
"value": "",
|
||||
"placeholder": "e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)",
|
||||
"type": "text",
|
||||
"textAlign": "right"
|
||||
}
|
||||
},
|
||||
{
|
||||
"key": "threads",
|
||||
"title": "Threads",
|
||||
|
||||
@ -27,8 +27,18 @@ export async function listSupportedBackends(): Promise<
|
||||
if (features.avx) supportedBackends.push('win-avx-x64')
|
||||
if (features.avx2) supportedBackends.push('win-avx2-x64')
|
||||
if (features.avx512) supportedBackends.push('win-avx512-x64')
|
||||
if (features.cuda11) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
|
||||
if (features.cuda12) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
|
||||
if (features.cuda11) {
|
||||
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu11.7-x64')
|
||||
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu11.7-x64')
|
||||
else if (features.avx) supportedBackends.push('win-avx-cuda-cu11.7-x64')
|
||||
else supportedBackends.push('win-noavx-cuda-cu11.7-x64')
|
||||
}
|
||||
if (features.cuda12) {
|
||||
if (features.avx512) supportedBackends.push('win-avx512-cuda-cu12.0-x64')
|
||||
else if (features.avx2) supportedBackends.push('win-avx2-cuda-cu12.0-x64')
|
||||
else if (features.avx) supportedBackends.push('win-avx-cuda-cu12.0-x64')
|
||||
else supportedBackends.push('win-noavx-cuda-cu12.0-x64')
|
||||
}
|
||||
if (features.vulkan) supportedBackends.push('win-vulkan-x64')
|
||||
}
|
||||
// not available yet, placeholder for future
|
||||
@ -39,8 +49,22 @@ export async function listSupportedBackends(): Promise<
|
||||
if (features.avx) supportedBackends.push('linux-avx-x64')
|
||||
if (features.avx2) supportedBackends.push('linux-avx2-x64')
|
||||
if (features.avx512) supportedBackends.push('linux-avx512-x64')
|
||||
if (features.cuda11) supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
|
||||
if (features.cuda12) supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
|
||||
if (features.cuda11) {
|
||||
if (features.avx512)
|
||||
supportedBackends.push('linux-avx512-cuda-cu11.7-x64')
|
||||
else if (features.avx2)
|
||||
supportedBackends.push('linux-avx2-cuda-cu11.7-x64')
|
||||
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu11.7-x64')
|
||||
else supportedBackends.push('linux-noavx-cuda-cu11.7-x64')
|
||||
}
|
||||
if (features.cuda12) {
|
||||
if (features.avx512)
|
||||
supportedBackends.push('linux-avx512-cuda-cu12.0-x64')
|
||||
else if (features.avx2)
|
||||
supportedBackends.push('linux-avx2-cuda-cu12.0-x64')
|
||||
else if (features.avx) supportedBackends.push('linux-avx-cuda-cu12.0-x64')
|
||||
else supportedBackends.push('linux-noavx-cuda-cu12.0-x64')
|
||||
}
|
||||
if (features.vulkan) supportedBackends.push('linux-vulkan-x64')
|
||||
}
|
||||
// not available yet, placeholder for future
|
||||
@ -236,10 +260,16 @@ async function _getSupportedFeatures() {
|
||||
if (compareVersions(driverVersion, minCuda12DriverVersion) >= 0)
|
||||
features.cuda12 = true
|
||||
}
|
||||
|
||||
if (gpuInfo.vulkan_info?.api_version) features.vulkan = true
|
||||
// Vulkan support check - only discrete GPUs with 6GB+ VRAM
|
||||
if (
|
||||
gpuInfo.vulkan_info?.api_version &&
|
||||
gpuInfo.vulkan_info?.device_type === 'DISCRETE_GPU' &&
|
||||
gpuInfo.total_memory >= 6 * 1024
|
||||
) {
|
||||
// 6GB (total_memory is in MB)
|
||||
features.vulkan = true
|
||||
}
|
||||
}
|
||||
|
||||
return features
|
||||
}
|
||||
|
||||
|
||||
@ -39,6 +39,7 @@ type LlamacppConfig = {
|
||||
auto_unload: boolean
|
||||
chat_template: string
|
||||
n_gpu_layers: number
|
||||
override_tensor_buffer_t: string
|
||||
ctx_size: number
|
||||
threads: number
|
||||
threads_batch: number
|
||||
@ -144,7 +145,6 @@ export default class llamacpp_extension extends AIEngine {
|
||||
readonly providerId: string = 'llamacpp'
|
||||
|
||||
private config: LlamacppConfig
|
||||
private activeSessions: Map<number, SessionInfo> = new Map()
|
||||
private providerPath!: string
|
||||
private apiSecret: string = 'JustAskNow'
|
||||
private pendingDownloads: Map<string, Promise<void>> = new Map()
|
||||
@ -297,6 +297,12 @@ export default class llamacpp_extension extends AIEngine {
|
||||
return { value: key, name: key }
|
||||
})
|
||||
|
||||
// Set the recommended backend based on bestAvailableBackendString
|
||||
if (bestAvailableBackendString) {
|
||||
backendSetting.controllerProps.recommended =
|
||||
bestAvailableBackendString
|
||||
}
|
||||
|
||||
const savedBackendSetting = await this.getSetting<string>(
|
||||
'version_backend',
|
||||
originalDefaultBackendValue
|
||||
@ -357,9 +363,16 @@ export default class llamacpp_extension extends AIEngine {
|
||||
|
||||
// Handle fresh installation case where version_backend might be 'none' or invalid
|
||||
if (
|
||||
!effectiveBackendString ||
|
||||
effectiveBackendString === 'none' ||
|
||||
!effectiveBackendString.includes('/')
|
||||
(!effectiveBackendString ||
|
||||
effectiveBackendString === 'none' ||
|
||||
!effectiveBackendString.includes('/') ||
|
||||
// If the selected backend is not in the list of supported backends
|
||||
// Need to reset too
|
||||
!version_backends.some(
|
||||
(e) => `${e.version}/${e.backend}` === effectiveBackendString
|
||||
)) &&
|
||||
// Ensure we have a valid best available backend
|
||||
bestAvailableBackendString
|
||||
) {
|
||||
effectiveBackendString = bestAvailableBackendString
|
||||
logger.info(
|
||||
@ -380,6 +393,17 @@ export default class llamacpp_extension extends AIEngine {
|
||||
})
|
||||
)
|
||||
logger.info(`Updated UI settings to show: ${effectiveBackendString}`)
|
||||
|
||||
// Emit for updating fe
|
||||
if (events && typeof events.emit === 'function') {
|
||||
logger.info(
|
||||
`Emitting settingsChanged event for version_backend with value: ${effectiveBackendString}`
|
||||
)
|
||||
events.emit('settingsChanged', {
|
||||
key: 'version_backend',
|
||||
value: effectiveBackendString,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Download and install the backend if not already present
|
||||
@ -746,16 +770,6 @@ export default class llamacpp_extension extends AIEngine {
|
||||
|
||||
override async onUnload(): Promise<void> {
|
||||
// Terminate all active sessions
|
||||
for (const [_, sInfo] of this.activeSessions) {
|
||||
try {
|
||||
await this.unload(sInfo.model_id)
|
||||
} catch (error) {
|
||||
logger.error(`Failed to unload model ${sInfo.model_id}:`, error)
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the sessions map
|
||||
this.activeSessions.clear()
|
||||
}
|
||||
|
||||
onSettingUpdate<T>(key: string, value: T): void {
|
||||
@ -1079,67 +1093,13 @@ export default class llamacpp_extension extends AIEngine {
|
||||
* Function to find a random port
|
||||
*/
|
||||
private async getRandomPort(): Promise<number> {
|
||||
const MAX_ATTEMPTS = 20000
|
||||
let attempts = 0
|
||||
|
||||
while (attempts < MAX_ATTEMPTS) {
|
||||
const port = Math.floor(Math.random() * 1000) + 3000
|
||||
|
||||
const isAlreadyUsed = Array.from(this.activeSessions.values()).some(
|
||||
(info) => info.port === port
|
||||
)
|
||||
|
||||
if (!isAlreadyUsed) {
|
||||
const isAvailable = await invoke<boolean>('is_port_available', { port })
|
||||
if (isAvailable) return port
|
||||
}
|
||||
|
||||
attempts++
|
||||
try {
|
||||
const port = await invoke<number>('get_random_port')
|
||||
return port
|
||||
} catch {
|
||||
logger.error('Unable to find a suitable port')
|
||||
throw new Error('Unable to find a suitable port for model')
|
||||
}
|
||||
|
||||
throw new Error('Failed to find an available port for the model to load')
|
||||
}
|
||||
|
||||
private async sleep(ms: number): Promise<void> {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
private async waitForModelLoad(
|
||||
sInfo: SessionInfo,
|
||||
timeoutMs = 240_000
|
||||
): Promise<void> {
|
||||
await this.sleep(500) // Wait before first check
|
||||
const start = Date.now()
|
||||
while (Date.now() - start < timeoutMs) {
|
||||
try {
|
||||
const res = await fetch(`http://localhost:${sInfo.port}/health`)
|
||||
|
||||
if (res.status === 503) {
|
||||
const body = await res.json()
|
||||
const msg = body?.error?.message ?? 'Model loading'
|
||||
logger.info(`waiting for model load... (${msg})`)
|
||||
} else if (res.ok) {
|
||||
const body = await res.json()
|
||||
if (body.status === 'ok') {
|
||||
return
|
||||
} else {
|
||||
logger.warn('Unexpected OK response from /health:', body)
|
||||
}
|
||||
} else {
|
||||
logger.warn(`Unexpected status ${res.status} from /health`)
|
||||
}
|
||||
} catch (e) {
|
||||
await this.unload(sInfo.model_id)
|
||||
throw new Error(`Model appears to have crashed: ${e}`)
|
||||
}
|
||||
|
||||
await this.sleep(800) // Retry interval
|
||||
}
|
||||
|
||||
await this.unload(sInfo.model_id)
|
||||
throw new Error(
|
||||
`Timed out loading model after ${timeoutMs}... killing llamacpp`
|
||||
)
|
||||
}
|
||||
|
||||
override async load(
|
||||
@ -1147,7 +1107,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
overrideSettings?: Partial<LlamacppConfig>,
|
||||
isEmbedding: boolean = false
|
||||
): Promise<SessionInfo> {
|
||||
const sInfo = this.findSessionByModel(modelId)
|
||||
const sInfo = await this.findSessionByModel(modelId)
|
||||
if (sInfo) {
|
||||
throw new Error('Model already loaded!!')
|
||||
}
|
||||
@ -1238,6 +1198,14 @@ export default class llamacpp_extension extends AIEngine {
|
||||
args.push('--jinja')
|
||||
args.push('--reasoning-format', 'none')
|
||||
args.push('-m', modelPath)
|
||||
// For overriding tensor buffer type, useful where
|
||||
// massive MOE models can be made faster by keeping attention on the GPU
|
||||
// and offloading the expert FFNs to the CPU.
|
||||
// This is an expert level settings and should only be used by people
|
||||
// who knows what they are doing.
|
||||
// Takes a regex with matching tensor name as input
|
||||
if (cfg.override_tensor_buffer_t)
|
||||
args.push('--override-tensor', cfg.override_tensor_buffer_t)
|
||||
args.push('-a', modelId)
|
||||
args.push('--port', String(port))
|
||||
if (modelConfig.mmproj_path) {
|
||||
@ -1247,11 +1215,6 @@ export default class llamacpp_extension extends AIEngine {
|
||||
])
|
||||
args.push('--mmproj', mmprojPath)
|
||||
}
|
||||
|
||||
if (cfg.ctx_size !== undefined) {
|
||||
args.push('-c', String(cfg.ctx_size))
|
||||
}
|
||||
|
||||
// Add remaining options from the interface
|
||||
if (cfg.chat_template) args.push('--chat-template', cfg.chat_template)
|
||||
const gpu_layers =
|
||||
@ -1263,8 +1226,9 @@ export default class llamacpp_extension extends AIEngine {
|
||||
if (cfg.batch_size > 0) args.push('--batch-size', String(cfg.batch_size))
|
||||
if (cfg.ubatch_size > 0) args.push('--ubatch-size', String(cfg.ubatch_size))
|
||||
if (cfg.device.length > 0) args.push('--device', cfg.device)
|
||||
if (cfg.split_mode.length > 0) args.push('--split-mode', cfg.split_mode)
|
||||
if (cfg.main_gpu !== undefined)
|
||||
if (cfg.split_mode.length > 0 && cfg.split_mode != 'layer')
|
||||
args.push('--split-mode', cfg.split_mode)
|
||||
if (cfg.main_gpu !== undefined && cfg.main_gpu != 0)
|
||||
args.push('--main-gpu', String(cfg.main_gpu))
|
||||
|
||||
// Boolean flags
|
||||
@ -1280,19 +1244,26 @@ export default class llamacpp_extension extends AIEngine {
|
||||
} else {
|
||||
if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
|
||||
if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
|
||||
args.push('--cache-type-k', cfg.cache_type_k)
|
||||
if (cfg.cache_type_k && cfg.cache_type_k != 'f16')
|
||||
args.push('--cache-type-k', cfg.cache_type_k)
|
||||
if (
|
||||
(cfg.flash_attn && cfg.cache_type_v != 'f16') ||
|
||||
cfg.flash_attn &&
|
||||
cfg.cache_type_v != 'f16' &&
|
||||
cfg.cache_type_v != 'f32'
|
||||
) {
|
||||
args.push('--cache-type-v', cfg.cache_type_v)
|
||||
}
|
||||
args.push('--defrag-thold', String(cfg.defrag_thold))
|
||||
if (cfg.defrag_thold && cfg.defrag_thold != 0.1)
|
||||
args.push('--defrag-thold', String(cfg.defrag_thold))
|
||||
|
||||
args.push('--rope-scaling', cfg.rope_scaling)
|
||||
args.push('--rope-scale', String(cfg.rope_scale))
|
||||
args.push('--rope-freq-base', String(cfg.rope_freq_base))
|
||||
args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
|
||||
if (cfg.rope_scaling && cfg.rope_scaling != 'none')
|
||||
args.push('--rope-scaling', cfg.rope_scaling)
|
||||
if (cfg.rope_scale && cfg.rope_scale != 1)
|
||||
args.push('--rope-scale', String(cfg.rope_scale))
|
||||
if (cfg.rope_freq_base && cfg.rope_freq_base != 0)
|
||||
args.push('--rope-freq-base', String(cfg.rope_freq_base))
|
||||
if (cfg.rope_freq_scale && cfg.rope_freq_scale != 1)
|
||||
args.push('--rope-freq-scale', String(cfg.rope_freq_scale))
|
||||
}
|
||||
|
||||
logger.info('Calling Tauri command llama_load with args:', args)
|
||||
@ -1306,26 +1277,20 @@ export default class llamacpp_extension extends AIEngine {
|
||||
libraryPath,
|
||||
args,
|
||||
})
|
||||
|
||||
// Store the session info for later use
|
||||
this.activeSessions.set(sInfo.pid, sInfo)
|
||||
await this.waitForModelLoad(sInfo)
|
||||
|
||||
return sInfo
|
||||
} catch (error) {
|
||||
logger.error('Error loading llama-server:\n', error)
|
||||
throw new Error(`Failed to load llama-server: ${error}`)
|
||||
logger.error('Error in load command:\n', error)
|
||||
throw new Error(`Failed to load model:\n${error}`)
|
||||
}
|
||||
}
|
||||
|
||||
override async unload(modelId: string): Promise<UnloadResult> {
|
||||
const sInfo: SessionInfo = this.findSessionByModel(modelId)
|
||||
const sInfo: SessionInfo = await this.findSessionByModel(modelId)
|
||||
if (!sInfo) {
|
||||
throw new Error(`No active session found for model: ${modelId}`)
|
||||
}
|
||||
const pid = sInfo.pid
|
||||
try {
|
||||
this.activeSessions.delete(pid)
|
||||
|
||||
// Pass the PID as the session_id
|
||||
const result = await invoke<UnloadResult>('unload_llama_model', {
|
||||
@ -1337,13 +1302,11 @@ export default class llamacpp_extension extends AIEngine {
|
||||
logger.info(`Successfully unloaded model with PID ${pid}`)
|
||||
} else {
|
||||
logger.warn(`Failed to unload model: ${result.error}`)
|
||||
this.activeSessions.set(sInfo.pid, sInfo)
|
||||
}
|
||||
|
||||
return result
|
||||
} catch (error) {
|
||||
logger.error('Error in unload command:', error)
|
||||
this.activeSessions.set(sInfo.pid, sInfo)
|
||||
return {
|
||||
success: false,
|
||||
error: `Failed to unload model: ${error}`,
|
||||
@ -1466,17 +1429,21 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
}
|
||||
|
||||
private findSessionByModel(modelId: string): SessionInfo | undefined {
|
||||
return Array.from(this.activeSessions.values()).find(
|
||||
(session) => session.model_id === modelId
|
||||
)
|
||||
private async findSessionByModel(modelId: string): Promise<SessionInfo> {
|
||||
try {
|
||||
let sInfo = await invoke<SessionInfo>('find_session_by_model', {modelId})
|
||||
return sInfo
|
||||
} catch (e) {
|
||||
logger.error(e)
|
||||
throw new Error(String(e))
|
||||
}
|
||||
}
|
||||
|
||||
override async chat(
|
||||
opts: chatCompletionRequest,
|
||||
abortController?: AbortController
|
||||
): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
|
||||
const sessionInfo = this.findSessionByModel(opts.model)
|
||||
const sessionInfo = await this.findSessionByModel(opts.model)
|
||||
if (!sessionInfo) {
|
||||
throw new Error(`No active session found for model: ${opts.model}`)
|
||||
}
|
||||
@ -1492,7 +1459,6 @@ export default class llamacpp_extension extends AIEngine {
|
||||
throw new Error('Model appears to have crashed! Please reload!')
|
||||
}
|
||||
} else {
|
||||
this.activeSessions.delete(sessionInfo.pid)
|
||||
throw new Error('Model have crashed! Please reload!')
|
||||
}
|
||||
const baseUrl = `http://localhost:${sessionInfo.port}/v1`
|
||||
@ -1541,11 +1507,13 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
|
||||
override async getLoadedModels(): Promise<string[]> {
|
||||
let lmodels: string[] = []
|
||||
for (const [_, sInfo] of this.activeSessions) {
|
||||
lmodels.push(sInfo.model_id)
|
||||
}
|
||||
return lmodels
|
||||
try {
|
||||
let models: string[] = await invoke<string[]>('get_loaded_models')
|
||||
return models
|
||||
} catch (e) {
|
||||
logger.error(e)
|
||||
throw new Error(e)
|
||||
}
|
||||
}
|
||||
|
||||
async getDevices(): Promise<DeviceList[]> {
|
||||
@ -1575,7 +1543,7 @@ export default class llamacpp_extension extends AIEngine {
|
||||
}
|
||||
|
||||
async embed(text: string[]): Promise<EmbeddingResponse> {
|
||||
let sInfo = this.findSessionByModel('sentence-transformer-mini')
|
||||
let sInfo = await this.findSessionByModel('sentence-transformer-mini')
|
||||
if (!sInfo) {
|
||||
const downloadedModelList = await this.list()
|
||||
if (
|
||||
|
||||
@ -51,6 +51,164 @@ describe('Backend functions', () => {
|
||||
])
|
||||
})
|
||||
|
||||
it('should return CUDA backends with proper CPU instruction detection for Windows', async () => {
|
||||
// Mock system info with CUDA support and AVX512
|
||||
window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
|
||||
os_type: 'windows',
|
||||
cpu: {
|
||||
arch: 'x86_64',
|
||||
extensions: ['avx', 'avx2', 'avx512'],
|
||||
},
|
||||
gpus: [
|
||||
{
|
||||
driver_version: '530.41',
|
||||
nvidia_info: { compute_capability: '8.6' },
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
// Mock GitHub releases with CUDA backends
|
||||
const mockReleases = [
|
||||
{
|
||||
tag_name: 'v1.0.0',
|
||||
assets: [
|
||||
{ name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
global.fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockReleases),
|
||||
})
|
||||
|
||||
const result = await listSupportedBackends()
|
||||
|
||||
expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
|
||||
})
|
||||
|
||||
it('should select appropriate CUDA backend based on CPU features - AVX2 only', async () => {
|
||||
// Mock system info with CUDA support but only AVX2
|
||||
window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
|
||||
os_type: 'windows',
|
||||
cpu: {
|
||||
arch: 'x86_64',
|
||||
extensions: ['avx', 'avx2'], // No AVX512
|
||||
},
|
||||
gpus: [
|
||||
{
|
||||
driver_version: '530.41',
|
||||
nvidia_info: { compute_capability: '8.6' },
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const mockReleases = [
|
||||
{
|
||||
tag_name: 'v1.0.0',
|
||||
assets: [
|
||||
{ name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
global.fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockReleases),
|
||||
})
|
||||
|
||||
const result = await listSupportedBackends()
|
||||
|
||||
expect(result).toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
|
||||
expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
|
||||
})
|
||||
|
||||
it('should select appropriate CUDA backend based on CPU features - no AVX', async () => {
|
||||
// Mock system info with CUDA support but no AVX
|
||||
window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
|
||||
os_type: 'windows',
|
||||
cpu: {
|
||||
arch: 'x86_64',
|
||||
extensions: [], // No AVX extensions
|
||||
},
|
||||
gpus: [
|
||||
{
|
||||
driver_version: '530.41',
|
||||
nvidia_info: { compute_capability: '8.6' },
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const mockReleases = [
|
||||
{
|
||||
tag_name: 'v1.0.0',
|
||||
assets: [
|
||||
{ name: 'llama-v1.0.0-bin-win-avx512-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-avx2-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-avx-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-win-noavx-cuda-cu12.0-x64.tar.gz' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
global.fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockReleases),
|
||||
})
|
||||
|
||||
const result = await listSupportedBackends()
|
||||
|
||||
expect(result).toContain({ version: 'v1.0.0', backend: 'win-noavx-cuda-cu12.0-x64' })
|
||||
expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx2-cuda-cu12.0-x64' })
|
||||
expect(result).not.toContain({ version: 'v1.0.0', backend: 'win-avx512-cuda-cu12.0-x64' })
|
||||
})
|
||||
|
||||
it('should return CUDA backends with proper CPU instruction detection for Linux', async () => {
|
||||
// Mock system info with CUDA support and AVX support
|
||||
window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
|
||||
os_type: 'linux',
|
||||
cpu: {
|
||||
arch: 'x86_64',
|
||||
extensions: ['avx'], // Only AVX, no AVX2
|
||||
},
|
||||
gpus: [
|
||||
{
|
||||
driver_version: '530.60.13',
|
||||
nvidia_info: { compute_capability: '8.6' },
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
const mockReleases = [
|
||||
{
|
||||
tag_name: 'v1.0.0',
|
||||
assets: [
|
||||
{ name: 'llama-v1.0.0-bin-linux-avx512-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-linux-avx2-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-linux-avx-cuda-cu12.0-x64.tar.gz' },
|
||||
{ name: 'llama-v1.0.0-bin-linux-noavx-cuda-cu12.0-x64.tar.gz' },
|
||||
],
|
||||
},
|
||||
]
|
||||
|
||||
global.fetch = vi.fn().mockResolvedValue({
|
||||
ok: true,
|
||||
json: () => Promise.resolve(mockReleases),
|
||||
})
|
||||
|
||||
const result = await listSupportedBackends()
|
||||
|
||||
expect(result).toContain({ version: 'v1.0.0', backend: 'linux-avx-cuda-cu12.0-x64' })
|
||||
expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx2-cuda-cu12.0-x64' })
|
||||
expect(result).not.toContain({ version: 'v1.0.0', backend: 'linux-avx512-cuda-cu12.0-x64' })
|
||||
})
|
||||
|
||||
it('should return supported backends for macOS arm64', async () => {
|
||||
window.core.api.getSystemInfo = vi.fn().mockResolvedValue({
|
||||
os_type: 'macos',
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
"test:coverage": "vitest run --coverage",
|
||||
"test:prepare": "yarn build:icon && yarn copy:assets:tauri && yarn build --no-bundle ",
|
||||
"dev:web": "yarn workspace @janhq/web-app dev",
|
||||
"dev:tauri": "yarn build:icon && yarn copy:assets:tauri && tauri dev",
|
||||
"dev:tauri": "yarn build:icon && yarn copy:assets:tauri && cross-env IS_CLEAN=true tauri dev",
|
||||
"copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\"",
|
||||
"download:lib": "node ./scripts/download-lib.mjs",
|
||||
"download:bin": "node ./scripts/download-bin.mjs",
|
||||
|
||||
@ -63,8 +63,12 @@ nix = "=0.30.1"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
libc = "0.2.172"
|
||||
windows-sys = { version = "0.60.2", features = ["Win32_Storage_FileSystem"] }
|
||||
|
||||
[target.'cfg(not(any(target_os = "android", target_os = "ios")))'.dependencies]
|
||||
tauri-plugin-updater = "2"
|
||||
once_cell = "1.18"
|
||||
tauri-plugin-single-instance = { version = "2.0.0", features = ["deep-link"] }
|
||||
|
||||
[target.'cfg(windows)'.dev-dependencies]
|
||||
tempfile = "3.20.0"
|
||||
|
||||
@ -10,7 +10,11 @@ use tokio::{
|
||||
time::{sleep, timeout},
|
||||
};
|
||||
|
||||
use super::{cmd::get_jan_data_folder_path, state::AppState};
|
||||
use super::{
|
||||
cmd::get_jan_data_folder_path,
|
||||
state::AppState,
|
||||
utils::can_override_npx,
|
||||
};
|
||||
|
||||
const DEFAULT_MCP_CONFIG: &str = r#"{
|
||||
"mcpServers": {
|
||||
@ -512,8 +516,8 @@ async fn schedule_mcp_start_task<R: Runtime>(
|
||||
.ok_or_else(|| format!("Failed to extract command args from config for {name}"))?;
|
||||
|
||||
let mut cmd = Command::new(command.clone());
|
||||
|
||||
if command == "npx" {
|
||||
|
||||
if command == "npx" && can_override_npx() {
|
||||
let mut cache_dir = app_path.clone();
|
||||
cache_dir.push(".npx");
|
||||
let bun_x_path = format!("{}/bun", bin_path.display());
|
||||
|
||||
@ -43,8 +43,8 @@ pub fn install_extensions(app: tauri::AppHandle, force: bool) -> Result<(), Stri
|
||||
|
||||
let mut clean_up = force;
|
||||
|
||||
// Check CLEAN environment variable to optionally skip extension install
|
||||
if std::env::var("CLEAN").is_ok() {
|
||||
// Check IS_CLEAN environment variable to optionally skip extension install
|
||||
if std::env::var("IS_CLEAN").is_ok() {
|
||||
clean_up = true;
|
||||
}
|
||||
log::info!(
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
use base64::{engine::general_purpose, Engine as _};
|
||||
use hmac::{Hmac, Mac};
|
||||
use rand::{rngs::StdRng, Rng, SeedableRng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sha2::Sha256;
|
||||
use std::collections::HashSet;
|
||||
use std::path::PathBuf;
|
||||
use std::process::Stdio;
|
||||
use std::time::Duration;
|
||||
@ -67,13 +69,39 @@ pub struct DeviceInfo {
|
||||
pub free: i32,
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
use std::os::windows::ffi::OsStrExt;
|
||||
|
||||
#[cfg(windows)]
|
||||
use std::ffi::OsStr;
|
||||
|
||||
#[cfg(windows)]
|
||||
use windows_sys::Win32::Storage::FileSystem::GetShortPathNameW;
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn get_short_path<P: AsRef<std::path::Path>>(path: P) -> Option<String> {
|
||||
let wide: Vec<u16> = OsStr::new(path.as_ref())
|
||||
.encode_wide()
|
||||
.chain(Some(0))
|
||||
.collect();
|
||||
|
||||
let mut buffer = vec![0u16; 260];
|
||||
let len = unsafe { GetShortPathNameW(wide.as_ptr(), buffer.as_mut_ptr(), buffer.len() as u32) };
|
||||
|
||||
if len > 0 {
|
||||
Some(String::from_utf16_lossy(&buffer[..len as usize]))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// --- Load Command ---
|
||||
#[tauri::command]
|
||||
pub async fn load_llama_model(
|
||||
state: State<'_, AppState>,
|
||||
backend_path: &str,
|
||||
library_path: Option<&str>,
|
||||
args: Vec<String>,
|
||||
mut args: Vec<String>,
|
||||
) -> ServerResult<SessionInfo> {
|
||||
let mut process_map = state.llama_server_process.lock().await;
|
||||
|
||||
@ -105,13 +133,38 @@ pub async fn load_llama_model(
|
||||
8080
|
||||
}
|
||||
};
|
||||
|
||||
let model_path = args
|
||||
// FOR MODEL PATH; TODO: DO SIMILARLY FOR MMPROJ PATH
|
||||
let model_path_index = args
|
||||
.iter()
|
||||
.position(|arg| arg == "-m")
|
||||
.and_then(|i| args.get(i + 1))
|
||||
.cloned()
|
||||
.unwrap_or_default();
|
||||
.ok_or(ServerError::LlamacppError("Missing `-m` flag".into()))?;
|
||||
|
||||
let model_path = args
|
||||
.get(model_path_index + 1)
|
||||
.ok_or(ServerError::LlamacppError("Missing path after `-m`".into()))?
|
||||
.clone();
|
||||
|
||||
let model_path_pb = PathBuf::from(model_path);
|
||||
if !model_path_pb.exists() {
|
||||
return Err(ServerError::LlamacppError(format!(
|
||||
"Invalid or inaccessible model path: {}",
|
||||
model_path_pb.display().to_string(),
|
||||
)));
|
||||
}
|
||||
#[cfg(windows)]
|
||||
{
|
||||
// use short path on Windows
|
||||
if let Some(short) = get_short_path(&model_path_pb) {
|
||||
args[model_path_index + 1] = short;
|
||||
} else {
|
||||
args[model_path_index + 1] = model_path_pb.display().to_string();
|
||||
}
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
args[model_path_index + 1] = model_path_pb.display().to_string();
|
||||
}
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
let api_key = args
|
||||
.iter()
|
||||
@ -181,7 +234,6 @@ pub async fn load_llama_model(
|
||||
|
||||
// Create channels for communication between tasks
|
||||
let (ready_tx, mut ready_rx) = mpsc::channel::<bool>(1);
|
||||
let (error_tx, mut error_rx) = mpsc::channel::<String>(1);
|
||||
|
||||
// Spawn task to monitor stdout for readiness
|
||||
let _stdout_task = tokio::spawn(async move {
|
||||
@ -228,20 +280,10 @@ pub async fn load_llama_model(
|
||||
|
||||
// Check for critical error indicators that should stop the process
|
||||
let line_lower = line.to_string().to_lowercase();
|
||||
if line_lower.contains("error loading model")
|
||||
|| line_lower.contains("unknown model architecture")
|
||||
|| line_lower.contains("fatal")
|
||||
|| line_lower.contains("cuda error")
|
||||
|| line_lower.contains("out of memory")
|
||||
|| line_lower.contains("error")
|
||||
|| line_lower.contains("failed")
|
||||
{
|
||||
let _ = error_tx.send(line.to_string()).await;
|
||||
}
|
||||
// Check for readiness indicator - llama-server outputs this when ready
|
||||
else if line.contains("server is listening on")
|
||||
|| line.contains("starting the main loop")
|
||||
|| line.contains("server listening on")
|
||||
if line_lower.contains("server is listening on")
|
||||
|| line_lower.contains("starting the main loop")
|
||||
|| line_lower.contains("server listening on")
|
||||
{
|
||||
log::info!("Server appears to be ready based on stderr: '{}'", line);
|
||||
let _ = ready_tx.send(true).await;
|
||||
@ -279,26 +321,6 @@ pub async fn load_llama_model(
|
||||
log::info!("Server is ready to accept requests!");
|
||||
break;
|
||||
}
|
||||
// Error occurred
|
||||
Some(error_msg) = error_rx.recv() => {
|
||||
log::error!("Server encountered an error: {}", error_msg);
|
||||
|
||||
// Give process a moment to exit naturally
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
// Check if process already exited
|
||||
if let Some(status) = child.try_wait()? {
|
||||
log::info!("Process exited with code {:?}", status);
|
||||
return Err(ServerError::LlamacppError(error_msg));
|
||||
} else {
|
||||
log::info!("Process still running, killing it...");
|
||||
let _ = child.kill().await;
|
||||
}
|
||||
|
||||
// Get full stderr output
|
||||
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||
return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output)));
|
||||
}
|
||||
// Check for process exit more frequently
|
||||
_ = tokio::time::sleep(Duration::from_millis(50)) => {
|
||||
// Check if process exited
|
||||
@ -332,7 +354,7 @@ pub async fn load_llama_model(
|
||||
pid: pid.clone(),
|
||||
port: port,
|
||||
model_id: model_id,
|
||||
model_path: model_path,
|
||||
model_path: model_path_pb.display().to_string(),
|
||||
api_key: api_key,
|
||||
};
|
||||
|
||||
@ -704,7 +726,308 @@ pub async fn is_process_running(pid: i32, state: State<'_, AppState>) -> Result<
|
||||
}
|
||||
|
||||
// check port availability
|
||||
#[tauri::command]
|
||||
pub fn is_port_available(port: u16) -> bool {
|
||||
fn is_port_available(port: u16) -> bool {
|
||||
std::net::TcpListener::bind(("127.0.0.1", port)).is_ok()
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub async fn get_random_port(state: State<'_, AppState>) -> Result<u16, String> {
|
||||
const MAX_ATTEMPTS: u32 = 20000;
|
||||
let mut attempts = 0;
|
||||
let mut rng = StdRng::from_entropy();
|
||||
|
||||
// Get all active ports from sessions
|
||||
let map = state.llama_server_process.lock().await;
|
||||
|
||||
let used_ports: HashSet<u16> = map
|
||||
.values()
|
||||
.filter_map(|session| {
|
||||
// Convert valid ports to u16 (filter out placeholder ports like -1)
|
||||
if session.info.port > 0 && session.info.port <= u16::MAX as i32 {
|
||||
Some(session.info.port as u16)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
|
||||
drop(map); // unlock early
|
||||
|
||||
while attempts < MAX_ATTEMPTS {
|
||||
let port = rng.gen_range(3000..4000);
|
||||
|
||||
if used_ports.contains(&port) {
|
||||
attempts += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if is_port_available(port) {
|
||||
return Ok(port);
|
||||
}
|
||||
|
||||
attempts += 1;
|
||||
}
|
||||
|
||||
Err("Failed to find an available port for the model to load".into())
|
||||
}
|
||||
|
||||
// find session
|
||||
#[tauri::command]
|
||||
pub async fn find_session_by_model(
|
||||
model_id: String,
|
||||
state: State<'_, AppState>,
|
||||
) -> Result<Option<SessionInfo>, String> {
|
||||
let map = state.llama_server_process.lock().await;
|
||||
|
||||
let session_info = map
|
||||
.values()
|
||||
.find(|backend_session| backend_session.info.model_id == model_id)
|
||||
.map(|backend_session| backend_session.info.clone());
|
||||
|
||||
Ok(session_info)
|
||||
}
|
||||
|
||||
// get running models
|
||||
#[tauri::command]
|
||||
pub async fn get_loaded_models(state: State<'_, AppState>) -> Result<Vec<String>, String> {
|
||||
let map = state.llama_server_process.lock().await;
|
||||
|
||||
let model_ids = map
|
||||
.values()
|
||||
.map(|backend_session| backend_session.info.model_id.clone())
|
||||
.collect();
|
||||
|
||||
Ok(model_ids)
|
||||
}
|
||||
|
||||
// tests
|
||||
//
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
#[cfg(windows)]
|
||||
use tempfile;
|
||||
|
||||
#[test]
|
||||
fn test_parse_multiple_devices() {
|
||||
let output = r#"ggml_vulkan: Found 2 Vulkan devices:
|
||||
ggml_vulkan: 0 = NVIDIA GeForce RTX 3090 (NVIDIA) | uma: 0 | fp16: 1 | bf16: 0 | warp size: 32 | shared memory: 49152 | int dot: 0 | matrix cores: KHR_coopmat
|
||||
ggml_vulkan: 1 = AMD Radeon Graphics (RADV GFX1151) (radv) | uma: 1 | fp16: 1 | bf16: 0 | warp size: 64 | shared memory: 65536 | int dot: 0 | matrix cores: KHR_coopmat
|
||||
Available devices:
|
||||
Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
|
||||
Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)
|
||||
"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
|
||||
assert_eq!(devices.len(), 2);
|
||||
|
||||
// Check first device
|
||||
assert_eq!(devices[0].id, "Vulkan0");
|
||||
assert_eq!(devices[0].name, "NVIDIA GeForce RTX 3090");
|
||||
assert_eq!(devices[0].mem, 24576);
|
||||
assert_eq!(devices[0].free, 24576);
|
||||
|
||||
// Check second device
|
||||
assert_eq!(devices[1].id, "Vulkan1");
|
||||
assert_eq!(devices[1].name, "AMD Radeon Graphics (RADV GFX1151)");
|
||||
assert_eq!(devices[1].mem, 87722);
|
||||
assert_eq!(devices[1].free, 87722);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_single_device() {
|
||||
let output = r#"Available devices:
|
||||
CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
|
||||
assert_eq!(devices.len(), 1);
|
||||
assert_eq!(devices[0].id, "CUDA0");
|
||||
assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
|
||||
assert_eq!(devices[0].mem, 24576);
|
||||
assert_eq!(devices[0].free, 24000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_with_extra_whitespace_and_empty_lines() {
|
||||
let output = r#"
|
||||
Available devices:
|
||||
|
||||
Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
|
||||
|
||||
Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)
|
||||
|
||||
"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
|
||||
assert_eq!(devices.len(), 2);
|
||||
assert_eq!(devices[0].id, "Vulkan0");
|
||||
assert_eq!(devices[1].id, "Vulkan1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_different_backends() {
|
||||
let output = r#"Available devices:
|
||||
CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)
|
||||
Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
|
||||
SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
|
||||
assert_eq!(devices.len(), 3);
|
||||
|
||||
assert_eq!(devices[0].id, "CUDA0");
|
||||
assert_eq!(devices[0].name, "NVIDIA GeForce RTX 4090");
|
||||
|
||||
assert_eq!(devices[1].id, "Vulkan0");
|
||||
assert_eq!(devices[1].name, "NVIDIA GeForce RTX 3090");
|
||||
|
||||
assert_eq!(devices[2].id, "SYCL0");
|
||||
assert_eq!(devices[2].name, "Intel(R) Arc(TM) A750 Graphics");
|
||||
assert_eq!(devices[2].mem, 8000);
|
||||
assert_eq!(devices[2].free, 7721);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_complex_gpu_names() {
|
||||
let output = r#"Available devices:
|
||||
Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)
|
||||
Vulkan1: AMD Radeon RX 7900 XTX (Navi 31) [RDNA 3] (24576 MiB, 24000 MiB free)"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
|
||||
assert_eq!(devices.len(), 2);
|
||||
|
||||
assert_eq!(devices[0].id, "Vulkan0");
|
||||
assert_eq!(devices[0].name, "Intel(R) Arc(tm) A750 Graphics (DG2)");
|
||||
assert_eq!(devices[0].mem, 8128);
|
||||
assert_eq!(devices[0].free, 8128);
|
||||
|
||||
assert_eq!(devices[1].id, "Vulkan1");
|
||||
assert_eq!(devices[1].name, "AMD Radeon RX 7900 XTX (Navi 31) [RDNA 3]");
|
||||
assert_eq!(devices[1].mem, 24576);
|
||||
assert_eq!(devices[1].free, 24000);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_no_devices() {
|
||||
let output = r#"Available devices:"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
assert_eq!(devices.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_missing_header() {
|
||||
let output = r#"Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)"#;
|
||||
|
||||
let result = parse_device_output(output);
|
||||
assert!(result.is_err());
|
||||
assert!(result
|
||||
.unwrap_err()
|
||||
.to_string()
|
||||
.contains("Could not find 'Available devices:' section"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_malformed_device_line() {
|
||||
let output = r#"Available devices:
|
||||
Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)
|
||||
Invalid line without colon
|
||||
Vulkan1: AMD Radeon Graphics (RADV GFX1151) (87722 MiB, 87722 MiB free)"#;
|
||||
|
||||
let devices = parse_device_output(output).unwrap();
|
||||
|
||||
// Should skip the malformed line and parse the valid ones
|
||||
assert_eq!(devices.len(), 2);
|
||||
assert_eq!(devices[0].id, "Vulkan0");
|
||||
assert_eq!(devices[1].id, "Vulkan1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_device_line_individual() {
|
||||
// Test the individual line parser
|
||||
let line = "Vulkan0: NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)";
|
||||
let device = parse_device_line(line).unwrap().unwrap();
|
||||
|
||||
assert_eq!(device.id, "Vulkan0");
|
||||
assert_eq!(device.name, "NVIDIA GeForce RTX 3090");
|
||||
assert_eq!(device.mem, 24576);
|
||||
assert_eq!(device.free, 24576);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_memory_pattern_detection() {
|
||||
assert!(is_memory_pattern("24576 MiB, 24576 MiB free"));
|
||||
assert!(is_memory_pattern("8000 MiB, 7721 MiB free"));
|
||||
assert!(!is_memory_pattern("just some text"));
|
||||
assert!(!is_memory_pattern("24576 MiB"));
|
||||
assert!(!is_memory_pattern("24576, 24576"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_memory_value() {
|
||||
assert_eq!(parse_memory_value("24576 MiB").unwrap(), 24576);
|
||||
assert_eq!(parse_memory_value("7721 MiB free").unwrap(), 7721);
|
||||
assert_eq!(parse_memory_value("8000").unwrap(), 8000);
|
||||
|
||||
assert!(parse_memory_value("").is_err());
|
||||
assert!(parse_memory_value("not_a_number MiB").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_memory_pattern() {
|
||||
let text = "NVIDIA GeForce RTX 3090 (24576 MiB, 24576 MiB free)";
|
||||
let result = find_memory_pattern(text);
|
||||
assert!(result.is_some());
|
||||
let (_start, content) = result.unwrap();
|
||||
assert_eq!(content, "24576 MiB, 24576 MiB free");
|
||||
|
||||
// Test with multiple parentheses
|
||||
let text = "Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)";
|
||||
let result = find_memory_pattern(text);
|
||||
assert!(result.is_some());
|
||||
let (_start, content) = result.unwrap();
|
||||
assert_eq!(content, "8128 MiB, 8128 MiB free");
|
||||
}
|
||||
#[test]
|
||||
fn test_path_with_uncommon_dir_names() {
|
||||
const UNCOMMON_DIR_NAME: &str = "тест-你好-éàç-🚀";
|
||||
#[cfg(windows)]
|
||||
{
|
||||
let dir = tempfile::tempdir().expect("Failed to create temp dir");
|
||||
let long_path = dir.path().join(UNCOMMON_DIR_NAME);
|
||||
std::fs::create_dir(&long_path)
|
||||
.expect("Failed to create test directory with non-ASCII name");
|
||||
let short_path = get_short_path(&long_path);
|
||||
assert!(
|
||||
short_path.is_ascii(),
|
||||
"The resulting short path must be composed of only ASCII characters. Got: {}",
|
||||
short_path
|
||||
);
|
||||
assert!(
|
||||
PathBuf::from(&short_path).exists(),
|
||||
"The returned short path must exist on the filesystem"
|
||||
);
|
||||
assert_ne!(
|
||||
short_path,
|
||||
long_path.to_str().unwrap(),
|
||||
"Short path should not be the same as the long path"
|
||||
);
|
||||
}
|
||||
#[cfg(not(windows))]
|
||||
{
|
||||
// On Unix, paths are typically UTF-8 and there's no "short path" concept.
|
||||
let long_path_str = format!("/tmp/{}", UNCOMMON_DIR_NAME);
|
||||
let path_buf = PathBuf::from(&long_path_str);
|
||||
let displayed_path = path_buf.display().to_string();
|
||||
assert_eq!(
|
||||
displayed_path, long_path_str,
|
||||
"Path with non-ASCII characters should be preserved exactly on non-Windows platforms"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -103,6 +103,21 @@ pub fn normalize_path(path: &Path) -> PathBuf {
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn can_override_npx() -> bool {
|
||||
// we need to check the CPU for the AVX2 instruction support if we are running under the MacOS
|
||||
// with Intel CPU. We can override `npx` command with `bun` only if CPU is
|
||||
// supporting AVX2, otherwise we need to use default `npx` binary
|
||||
#[cfg(all(target_os = "macos", any(target_arch = "x86", target_arch = "x86_64")))]
|
||||
{
|
||||
if !is_x86_feature_detected!("avx2") {
|
||||
log::warn!("Your CPU doesn't support AVX2 instruction, default npx binary will be used");
|
||||
return false; // we cannot override npx with bun binary
|
||||
}
|
||||
}
|
||||
|
||||
true // by default, we can override npx with bun binary
|
||||
}
|
||||
|
||||
#[tauri::command]
|
||||
pub fn write_yaml(
|
||||
app: tauri::AppHandle,
|
||||
@ -195,3 +210,4 @@ pub fn is_library_available(library: &str) -> bool {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -95,7 +95,9 @@ pub fn run() {
|
||||
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::get_devices,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::is_port_available,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::get_random_port,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::find_session_by_model,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::get_loaded_models,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
|
||||
core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
|
||||
])
|
||||
@ -129,6 +131,12 @@ pub fn run() {
|
||||
if let Err(e) = setup::install_extensions(app.handle().clone(), false) {
|
||||
log::error!("Failed to install extensions: {}", e);
|
||||
}
|
||||
|
||||
#[cfg(any(windows, target_os = "linux"))]
|
||||
{
|
||||
use tauri_plugin_deep_link::DeepLinkExt;
|
||||
app.deep_link().register_all()?;
|
||||
}
|
||||
setup_mcp(app);
|
||||
Ok(())
|
||||
})
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
"build": {
|
||||
"frontendDist": "../web-app/dist",
|
||||
"devUrl": "http://localhost:1420",
|
||||
"beforeDevCommand": "cross-env IS_TAURI=true CLEAN=true yarn dev:web",
|
||||
"beforeDevCommand": "cross-env IS_TAURI=true yarn dev:web",
|
||||
"beforeBuildCommand": "cross-env IS_TAURI=true yarn build:web"
|
||||
},
|
||||
"app": {
|
||||
|
||||
@ -161,8 +161,14 @@ export function DownloadManagement() {
|
||||
console.debug('onFileDownloadError', state)
|
||||
removeDownload(state.modelId)
|
||||
removeLocalDownloadingModel(state.modelId)
|
||||
toast.error(t('common:toast.downloadFailed.title'), {
|
||||
id: 'download-failed',
|
||||
description: t('common:toast.downloadFailed.description', {
|
||||
item: state.modelId,
|
||||
}),
|
||||
})
|
||||
},
|
||||
[removeDownload, removeLocalDownloadingModel]
|
||||
[removeDownload, removeLocalDownloadingModel, t]
|
||||
)
|
||||
|
||||
const onFileDownloadStopped = useCallback(
|
||||
|
||||
@ -71,7 +71,7 @@ export function ModelSetting({
|
||||
})
|
||||
|
||||
// Call debounced stopModel only when updating ctx_len or ngl
|
||||
if (key === 'ctx_len' || key === 'ngl') {
|
||||
if (key === 'ctx_len' || key === 'ngl' || key === 'chat_template') {
|
||||
debouncedStopModel(model.id)
|
||||
}
|
||||
}
|
||||
@ -101,9 +101,17 @@ export function ModelSetting({
|
||||
<div className="px-4 space-y-6">
|
||||
{Object.entries(model.settings || {}).map(([key, value]) => {
|
||||
const config = value as ProviderSetting
|
||||
|
||||
return (
|
||||
<div key={key} className="space-y-2">
|
||||
<div className="flex items-start justify-between gap-8">
|
||||
<div
|
||||
className={cn(
|
||||
'flex items-start justify-between gap-8 last:mb-2',
|
||||
(key === 'chat_template' ||
|
||||
key === 'override_tensor_buffer_t') &&
|
||||
'flex-col gap-1 w-full'
|
||||
)}
|
||||
>
|
||||
<div className="space-y-1 mb-2">
|
||||
<h3 className="font-medium">{config.title}</h3>
|
||||
<p className="text-main-view-fg/70 text-xs">
|
||||
|
||||
@ -30,14 +30,31 @@ const ThinkingBlock = ({ id, text }: Props) => {
|
||||
const { thinkingState, setThinkingState } = useThinkingStore()
|
||||
const { streamingContent } = useAppState()
|
||||
const { t } = useTranslation()
|
||||
const loading = !text.includes('</think>') && streamingContent
|
||||
// Check for thinking formats
|
||||
const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
|
||||
const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>')
|
||||
const loading = (hasThinkTag || hasAnalysisChannel) && streamingContent
|
||||
const isExpanded = thinkingState[id] ?? (loading ? true : false)
|
||||
const handleClick = () => {
|
||||
const newExpandedState = !isExpanded
|
||||
setThinkingState(id, newExpandedState)
|
||||
}
|
||||
|
||||
if (!text.replace(/<\/?think>/g, '').trim()) return null
|
||||
// Extract thinking content from either format
|
||||
const extractThinkingContent = (text: string) => {
|
||||
return text
|
||||
.replace(/<\/?think>/g, '')
|
||||
.replace(/<\|channel\|>analysis<\|message\|>/g, '')
|
||||
.replace(/<\|start\|>assistant<\|channel\|>final<\|message\|>/g, '')
|
||||
.replace(/assistant<\|channel\|>final<\|message\|>/g, '')
|
||||
.replace(/<\|channel\|>/g, '') // remove any remaining channel markers
|
||||
.replace(/<\|message\|>/g, '') // remove any remaining message markers
|
||||
.replace(/<\|start\|>/g, '') // remove any remaining start markers
|
||||
.trim()
|
||||
}
|
||||
|
||||
const thinkingContent = extractThinkingContent(text)
|
||||
if (!thinkingContent) return null
|
||||
|
||||
return (
|
||||
<div
|
||||
@ -63,7 +80,7 @@ const ThinkingBlock = ({ id, text }: Props) => {
|
||||
|
||||
{isExpanded && (
|
||||
<div className="mt-2 pl-6 pr-4 text-main-view-fg/60">
|
||||
<RenderMarkdown content={text.replace(/<\/?think>/g, '').trim()} />
|
||||
<RenderMarkdown content={thinkingContent} />
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@ -170,18 +170,33 @@ export const ThreadContent = memo(
|
||||
)
|
||||
|
||||
const { reasoningSegment, textSegment } = useMemo(() => {
|
||||
const isThinking = text.includes('<think>') && !text.includes('</think>')
|
||||
if (isThinking) return { reasoningSegment: text, textSegment: '' }
|
||||
// Check for thinking formats
|
||||
const hasThinkTag = text.includes('<think>') && !text.includes('</think>')
|
||||
const hasAnalysisChannel = text.includes('<|channel|>analysis<|message|>') && !text.includes('<|start|>assistant<|channel|>final<|message|>')
|
||||
|
||||
if (hasThinkTag || hasAnalysisChannel) return { reasoningSegment: text, textSegment: '' }
|
||||
|
||||
const match = text.match(/<think>([\s\S]*?)<\/think>/)
|
||||
if (match?.index === undefined)
|
||||
return { reasoningSegment: undefined, textSegment: text }
|
||||
|
||||
const splitIndex = match.index + match[0].length
|
||||
return {
|
||||
reasoningSegment: text.slice(0, splitIndex),
|
||||
textSegment: text.slice(splitIndex),
|
||||
// Check for completed think tag format
|
||||
const thinkMatch = text.match(/<think>([\s\S]*?)<\/think>/)
|
||||
if (thinkMatch?.index !== undefined) {
|
||||
const splitIndex = thinkMatch.index + thinkMatch[0].length
|
||||
return {
|
||||
reasoningSegment: text.slice(0, splitIndex),
|
||||
textSegment: text.slice(splitIndex),
|
||||
}
|
||||
}
|
||||
|
||||
// Check for completed analysis channel format
|
||||
const analysisMatch = text.match(/<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/)
|
||||
if (analysisMatch?.index !== undefined) {
|
||||
const splitIndex = analysisMatch.index + analysisMatch[0].length
|
||||
return {
|
||||
reasoningSegment: text.slice(0, splitIndex),
|
||||
textSegment: text.slice(splitIndex),
|
||||
}
|
||||
}
|
||||
|
||||
return { reasoningSegment: undefined, textSegment: text }
|
||||
}, [text])
|
||||
|
||||
const { getMessages, deleteMessage } = useMessages()
|
||||
|
||||
@ -36,7 +36,10 @@ export default function OutOfContextPromiseModal() {
|
||||
|
||||
return (
|
||||
<Dialog open={isModalOpen} onOpenChange={handleDialogOpen}>
|
||||
<DialogContent>
|
||||
<DialogContent
|
||||
showCloseButton={false}
|
||||
onInteractOutside={(e) => e.preventDefault()}
|
||||
>
|
||||
<DialogHeader>
|
||||
<DialogTitle>{t('model-errors:title')}</DialogTitle>
|
||||
</DialogHeader>
|
||||
@ -57,7 +60,7 @@ export default function OutOfContextPromiseModal() {
|
||||
{t('model-errors:truncateInput')}
|
||||
</Button>
|
||||
<Button
|
||||
asChild
|
||||
autoFocus
|
||||
onClick={() => {
|
||||
handleContextLength()
|
||||
}}
|
||||
|
||||
@ -5,30 +5,61 @@ import {
|
||||
DropdownMenuTrigger,
|
||||
} from '@/components/ui/dropdown-menu'
|
||||
|
||||
import {
|
||||
Tooltip,
|
||||
TooltipTrigger,
|
||||
TooltipContent,
|
||||
} from '@/components/ui/tooltip'
|
||||
import { IconStarFilled } from '@tabler/icons-react'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
// Dropdown component
|
||||
type DropdownControlProps = {
|
||||
value: string
|
||||
options?: Array<{ value: number | string; name: string }>
|
||||
recommended?: string
|
||||
onChange: (value: number | string) => void
|
||||
}
|
||||
|
||||
export function DropdownControl({
|
||||
value,
|
||||
options = [],
|
||||
recommended,
|
||||
onChange,
|
||||
}: DropdownControlProps) {
|
||||
const isSelected =
|
||||
options.find((option) => option.value === value)?.name || value
|
||||
|
||||
return (
|
||||
<DropdownMenu>
|
||||
<DropdownMenuTrigger className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-2 py-1 rounded font-medium cursor-pointer">
|
||||
{options.find((option) => option.value === value)?.name || value}
|
||||
<DropdownMenuTrigger className="bg-main-view-fg/5 hover:bg-main-view-fg/8 px-3 py-1 rounded-sm font-medium cursor-pointer">
|
||||
{isSelected}
|
||||
</DropdownMenuTrigger>
|
||||
<DropdownMenuContent align="end">
|
||||
<DropdownMenuContent align="end" className="max-h-70">
|
||||
{options.map((option, optionIndex) => (
|
||||
<DropdownMenuItem
|
||||
key={optionIndex}
|
||||
onClick={() => onChange(option.value)}
|
||||
className={cn(
|
||||
'flex items-center justify-between my-1',
|
||||
isSelected === option.name
|
||||
? 'bg-main-view-fg/6 hover:bg-main-view-fg/6'
|
||||
: ''
|
||||
)}
|
||||
>
|
||||
{option.name}
|
||||
<span>{option.name}</span>
|
||||
{recommended === option.value && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<div className="cursor-pointer">
|
||||
<IconStarFilled className="text-accent" />
|
||||
</div>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent side="top" sideOffset={8} className="z-50">
|
||||
Recommended
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
</DropdownMenuItem>
|
||||
))}
|
||||
</DropdownMenuContent>
|
||||
|
||||
@ -28,6 +28,7 @@ type DynamicControllerProps = {
|
||||
min?: number
|
||||
max?: number
|
||||
step?: number
|
||||
recommended?: string
|
||||
}
|
||||
onChange: (value: string | boolean | number) => void
|
||||
}
|
||||
@ -65,6 +66,7 @@ export function DynamicControllerSetting({
|
||||
<DropdownControl
|
||||
value={controllerProps.value as string}
|
||||
options={controllerProps.options}
|
||||
recommended={controllerProps.recommended}
|
||||
onChange={(newValue) => onChange(newValue)}
|
||||
/>
|
||||
)
|
||||
|
||||
@ -365,8 +365,7 @@ export const useChat = () => {
|
||||
if (
|
||||
typeof errorMessage === 'string' &&
|
||||
errorMessage.includes(OUT_OF_CONTEXT_SIZE) &&
|
||||
selectedModel &&
|
||||
troubleshooting
|
||||
selectedModel
|
||||
) {
|
||||
const method = await showIncreaseContextSizeModal()
|
||||
if (method === 'ctx_len') {
|
||||
|
||||
@ -25,8 +25,8 @@ type MCPServerStoreState = {
|
||||
editServer: (key: string, config: MCPServerConfig) => void
|
||||
deleteServer: (key: string) => void
|
||||
setServers: (servers: MCPServers) => void
|
||||
syncServers: () => void
|
||||
syncServersAndRestart: () => void
|
||||
syncServers: () => Promise<void>
|
||||
syncServersAndRestart: () => Promise<void>
|
||||
}
|
||||
|
||||
export const useMCPServers = create<MCPServerStoreState>()((set, get) => ({
|
||||
|
||||
@ -2,6 +2,7 @@ import { create } from 'zustand'
|
||||
import { persist, createJSONStorage } from 'zustand/middleware'
|
||||
import { localStorageKey } from '@/constants/localStorage'
|
||||
import { sep } from '@tauri-apps/api/path'
|
||||
import { modelSettings } from '@/lib/predefined'
|
||||
|
||||
type ModelProviderState = {
|
||||
providers: ModelProvider[]
|
||||
@ -211,8 +212,21 @@ export const useModelProvider = create<ModelProviderState>()(
|
||||
name: localStorageKey.modelProvider,
|
||||
storage: createJSONStorage(() => localStorage),
|
||||
migrate: (persistedState: unknown, version: number) => {
|
||||
const state = persistedState as ModelProviderState
|
||||
|
||||
const state = persistedState as ModelProviderState & {
|
||||
providers: Array<
|
||||
ModelProvider & {
|
||||
models: Array<
|
||||
Model & {
|
||||
settings?: Record<string, unknown> & {
|
||||
chatTemplate?: string
|
||||
chat_template?: string
|
||||
}
|
||||
}
|
||||
>
|
||||
}
|
||||
>
|
||||
}
|
||||
|
||||
// Migration for cont_batching description update (version 0 -> 1)
|
||||
if (version === 0 && state?.providers) {
|
||||
state.providers = state.providers.map((provider) => {
|
||||
@ -221,7 +235,8 @@ export const useModelProvider = create<ModelProviderState>()(
|
||||
if (setting.key === 'cont_batching') {
|
||||
return {
|
||||
...setting,
|
||||
description: 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
|
||||
description:
|
||||
'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.',
|
||||
}
|
||||
}
|
||||
return setting
|
||||
@ -230,9 +245,65 @@ export const useModelProvider = create<ModelProviderState>()(
|
||||
return provider
|
||||
})
|
||||
}
|
||||
|
||||
// Migration for chatTemplate key to chat_template (version 1 -> 2)
|
||||
if (version === 1 && state?.providers) {
|
||||
state.providers.forEach((provider) => {
|
||||
if (provider.models) {
|
||||
provider.models.forEach((model) => {
|
||||
// Initialize settings if it doesn't exist
|
||||
if (!model.settings) {
|
||||
model.settings = {}
|
||||
}
|
||||
|
||||
// Migrate chatTemplate key to chat_template
|
||||
if (model.settings.chatTemplate) {
|
||||
model.settings.chat_template = model.settings.chatTemplate
|
||||
delete model.settings.chatTemplate
|
||||
}
|
||||
|
||||
// Add missing chat_template setting if it doesn't exist
|
||||
if (!model.settings.chat_template) {
|
||||
model.settings.chat_template = {
|
||||
...modelSettings.chatTemplate,
|
||||
controller_props: {
|
||||
...modelSettings.chatTemplate.controller_props,
|
||||
},
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Migration for override_tensor_buffer_type key (version 2 -> 3)
|
||||
if (version === 2 && state?.providers) {
|
||||
state.providers.forEach((provider) => {
|
||||
if (provider.models) {
|
||||
provider.models.forEach((model) => {
|
||||
// Initialize settings if it doesn't exist
|
||||
if (!model.settings) {
|
||||
model.settings = {}
|
||||
}
|
||||
|
||||
// Add missing override_tensor_buffer_type setting if it doesn't exist
|
||||
if (!model.settings.override_tensor_buffer_t) {
|
||||
model.settings.override_tensor_buffer_t = {
|
||||
...modelSettings.override_tensor_buffer_t,
|
||||
controller_props: {
|
||||
...modelSettings.override_tensor_buffer_t
|
||||
.controller_props,
|
||||
},
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
return state
|
||||
},
|
||||
version: 1,
|
||||
version: 3,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
@ -407,6 +407,50 @@ describe('CompletionMessagesBuilder', () => {
|
||||
const result = builder.getMessages()
|
||||
expect(result[0].content).toBe('Clean answer')
|
||||
})
|
||||
|
||||
it('should remove analysis channel reasoning content', () => {
|
||||
const builder = new CompletionMessagesBuilder([])
|
||||
|
||||
builder.addAssistantMessage(
|
||||
'<|channel|>analysis<|message|>Let me analyze this step by step...<|start|>assistant<|channel|>final<|message|>The final answer is 42.'
|
||||
)
|
||||
|
||||
const result = builder.getMessages()
|
||||
expect(result[0].content).toBe('The final answer is 42.')
|
||||
})
|
||||
|
||||
it('should handle analysis channel without final message', () => {
|
||||
const builder = new CompletionMessagesBuilder([])
|
||||
|
||||
builder.addAssistantMessage(
|
||||
'<|channel|>analysis<|message|>Only analysis content here...'
|
||||
)
|
||||
|
||||
const result = builder.getMessages()
|
||||
expect(result[0].content).toBe('<|channel|>analysis<|message|>Only analysis content here...')
|
||||
})
|
||||
|
||||
it('should handle analysis channel with multiline content', () => {
|
||||
const builder = new CompletionMessagesBuilder([])
|
||||
|
||||
builder.addAssistantMessage(
|
||||
'<|channel|>analysis<|message|>Step 1: First analysis\nStep 2: Second analysis\nStep 3: Final analysis<|start|>assistant<|channel|>final<|message|>Based on my analysis, here is the result.'
|
||||
)
|
||||
|
||||
const result = builder.getMessages()
|
||||
expect(result[0].content).toBe('Based on my analysis, here is the result.')
|
||||
})
|
||||
|
||||
it('should handle both think and analysis channel tags', () => {
|
||||
const builder = new CompletionMessagesBuilder([])
|
||||
|
||||
builder.addAssistantMessage(
|
||||
'<think>Initial thought</think><|channel|>analysis<|message|>Detailed analysis<|start|>assistant<|channel|>final<|message|>Final response'
|
||||
)
|
||||
|
||||
const result = builder.getMessages()
|
||||
expect(result[0].content).toBe('Final response')
|
||||
})
|
||||
})
|
||||
|
||||
describe('integration tests', () => {
|
||||
|
||||
@ -102,6 +102,15 @@ export class CompletionMessagesBuilder {
|
||||
content = content.slice(splitIndex).trim()
|
||||
}
|
||||
}
|
||||
if (content.includes('<|channel|>analysis<|message|>')) {
|
||||
const match = content.match(
|
||||
/<\|channel\|>analysis<\|message\|>([\s\S]*?)<\|start\|>assistant<\|channel\|>final<\|message\|>/
|
||||
)
|
||||
if (match?.index !== undefined) {
|
||||
const splitIndex = match.index + match[0].length
|
||||
content = content.slice(splitIndex).trim()
|
||||
}
|
||||
}
|
||||
return content
|
||||
}
|
||||
}
|
||||
|
||||
@ -22,7 +22,6 @@ export const modelSettings = {
|
||||
type: 'number',
|
||||
},
|
||||
},
|
||||
|
||||
temperature: {
|
||||
key: 'temperature',
|
||||
title: 'Temperature',
|
||||
@ -121,4 +120,28 @@ export const modelSettings = {
|
||||
type: 'number',
|
||||
},
|
||||
},
|
||||
chatTemplate: {
|
||||
key: 'chat_template',
|
||||
title: 'Custom Jinja Chat template',
|
||||
description: 'Custom Jinja chat_template to be used for the model',
|
||||
controller_type: 'textarea',
|
||||
controller_props: {
|
||||
value: '',
|
||||
placeholder:
|
||||
'e.g., {% for message in messages %}...{% endfor %} (default is read from GGUF)',
|
||||
type: 'text',
|
||||
textAlign: 'right',
|
||||
},
|
||||
},
|
||||
override_tensor_buffer_t: {
|
||||
key: 'override_tensor_buffer_t',
|
||||
title: 'Override Tensor Buffer Type',
|
||||
description: 'Override the tensor buffer type for the model',
|
||||
controller_type: 'input',
|
||||
controller_props: {
|
||||
value: '',
|
||||
placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU',
|
||||
type: 'text',
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
@ -257,6 +257,10 @@
|
||||
"downloadCancelled": {
|
||||
"title": "Download Cancelled",
|
||||
"description": "The download process was cancelled"
|
||||
},
|
||||
"downloadFailed": {
|
||||
"title": "Download Failed",
|
||||
"description": "{{item}} download failed"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -62,7 +62,12 @@ export function DataProvider() {
|
||||
|
||||
// Check for app updates
|
||||
useEffect(() => {
|
||||
checkForUpdate()
|
||||
// Only check for updates if the auto updater is not disabled
|
||||
// App might be distributed via other package managers
|
||||
// or methods that handle updates differently
|
||||
if (!AUTO_UPDATER_DISABLED) {
|
||||
checkForUpdate()
|
||||
}
|
||||
}, [checkForUpdate])
|
||||
|
||||
const handleDeepLink = (urls: string[] | null) => {
|
||||
@ -79,7 +84,7 @@ export function DataProvider() {
|
||||
const resource = params.slice(1).join('/')
|
||||
// return { action, provider, resource }
|
||||
navigate({
|
||||
to: route.hub.index,
|
||||
to: route.hub.model,
|
||||
search: {
|
||||
repo: resource,
|
||||
},
|
||||
|
||||
@ -1,5 +1,10 @@
|
||||
import HeaderPage from '@/containers/HeaderPage'
|
||||
import { createFileRoute, useParams, useNavigate } from '@tanstack/react-router'
|
||||
import {
|
||||
createFileRoute,
|
||||
useParams,
|
||||
useNavigate,
|
||||
useSearch,
|
||||
} from '@tanstack/react-router'
|
||||
import {
|
||||
IconArrowLeft,
|
||||
IconDownload,
|
||||
@ -13,23 +18,38 @@ import { RenderMarkdown } from '@/containers/RenderMarkdown'
|
||||
import { useEffect, useMemo, useCallback, useState } from 'react'
|
||||
import { useModelProvider } from '@/hooks/useModelProvider'
|
||||
import { useDownloadStore } from '@/hooks/useDownloadStore'
|
||||
import { pullModel } from '@/services/models'
|
||||
import {
|
||||
CatalogModel,
|
||||
convertHfRepoToCatalogModel,
|
||||
fetchHuggingFaceRepo,
|
||||
pullModel,
|
||||
} from '@/services/models'
|
||||
import { Progress } from '@/components/ui/progress'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
type SearchParams = {
|
||||
repo: string
|
||||
}
|
||||
|
||||
export const Route = createFileRoute('/hub/$modelId')({
|
||||
component: HubModelDetail,
|
||||
validateSearch: (search: Record<string, unknown>): SearchParams => ({
|
||||
repo: search.repo as SearchParams['repo'],
|
||||
}),
|
||||
})
|
||||
|
||||
function HubModelDetail() {
|
||||
const { modelId } = useParams({ from: Route.id })
|
||||
const navigate = useNavigate()
|
||||
const { sources, fetchSources } = useModelSources()
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const search = useSearch({ from: Route.id as any })
|
||||
const { getProviderByName } = useModelProvider()
|
||||
const llamaProvider = getProviderByName('llamacpp')
|
||||
const { downloads, localDownloadingModels, addLocalDownloadingModel } =
|
||||
useDownloadStore()
|
||||
const [repoData, setRepoData] = useState<CatalogModel | undefined>()
|
||||
|
||||
// State for README content
|
||||
const [readmeContent, setReadmeContent] = useState<string>('')
|
||||
@ -39,10 +59,21 @@ function HubModelDetail() {
|
||||
fetchSources()
|
||||
}, [fetchSources])
|
||||
|
||||
const fetchRepo = useCallback(async () => {
|
||||
const repoInfo = await fetchHuggingFaceRepo(search.repo || modelId)
|
||||
if (repoInfo) {
|
||||
const repoDetail = convertHfRepoToCatalogModel(repoInfo)
|
||||
setRepoData(repoDetail)
|
||||
}
|
||||
}, [modelId, search])
|
||||
|
||||
useEffect(() => {
|
||||
fetchRepo()
|
||||
}, [modelId, fetchRepo])
|
||||
// Find the model data from sources
|
||||
const modelData = useMemo(() => {
|
||||
return sources.find((model) => model.model_name === modelId)
|
||||
}, [sources, modelId])
|
||||
return sources.find((model) => model.model_name === modelId) ?? repoData
|
||||
}, [sources, modelId, repoData])
|
||||
|
||||
// Download processes
|
||||
const downloadProcesses = useMemo(
|
||||
@ -116,7 +147,6 @@ function HubModelDetail() {
|
||||
})
|
||||
}, [modelData])
|
||||
|
||||
|
||||
// Fetch README content when modelData.readme is available
|
||||
useEffect(() => {
|
||||
if (modelData?.readme) {
|
||||
|
||||
@ -31,7 +31,7 @@ import {
|
||||
CatalogModel,
|
||||
pullModel,
|
||||
fetchHuggingFaceRepo,
|
||||
HuggingFaceRepo,
|
||||
convertHfRepoToCatalogModel,
|
||||
} from '@/services/models'
|
||||
import { useDownloadStore } from '@/hooks/useDownloadStore'
|
||||
import { Progress } from '@/components/ui/progress'
|
||||
@ -63,14 +63,16 @@ function Hub() {
|
||||
{ value: 'newest', name: t('hub:sortNewest') },
|
||||
{ value: 'most-downloaded', name: t('hub:sortMostDownloaded') },
|
||||
]
|
||||
const searchOptions = {
|
||||
includeScore: true,
|
||||
// Search in `author` and in `tags` array
|
||||
keys: ['model_name', 'quants.model_id'],
|
||||
}
|
||||
const searchOptions = useMemo(() => {
|
||||
return {
|
||||
includeScore: true,
|
||||
// Search in `author` and in `tags` array
|
||||
keys: ['model_name', 'quants.model_id'],
|
||||
}
|
||||
}, [])
|
||||
|
||||
const { sources, addSource, fetchSources, loading } = useModelSources()
|
||||
const search = useSearch({ from: route.hub.index as any })
|
||||
|
||||
const [searchValue, setSearchValue] = useState('')
|
||||
const [sortSelected, setSortSelected] = useState('newest')
|
||||
const [expandedModels, setExpandedModels] = useState<Record<string, boolean>>(
|
||||
@ -92,48 +94,6 @@ function Hub() {
|
||||
const { getProviderByName } = useModelProvider()
|
||||
const llamaProvider = getProviderByName('llamacpp')
|
||||
|
||||
// Convert HuggingFace repository to CatalogModel format
|
||||
const convertHfRepoToCatalogModel = useCallback(
|
||||
(repo: HuggingFaceRepo): CatalogModel => {
|
||||
// Extract GGUF files from the repository siblings
|
||||
const ggufFiles =
|
||||
repo.siblings?.filter((file) =>
|
||||
file.rfilename.toLowerCase().endsWith('.gguf')
|
||||
) || []
|
||||
|
||||
// Convert GGUF files to quants format
|
||||
const quants = ggufFiles.map((file) => {
|
||||
// Format file size
|
||||
const formatFileSize = (size?: number) => {
|
||||
if (!size) return 'Unknown size'
|
||||
if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
|
||||
return `${(size / 1024 ** 3).toFixed(1)} GB`
|
||||
}
|
||||
|
||||
// Generate model_id from filename (remove .gguf extension, case-insensitive)
|
||||
const modelId = file.rfilename.replace(/\.gguf$/i, '')
|
||||
|
||||
return {
|
||||
model_id: modelId,
|
||||
path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
|
||||
file_size: formatFileSize(file.size),
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
model_name: repo.modelId,
|
||||
description: `**Metadata:** ${repo.pipeline_tag}\n\n **Tags**: ${repo.tags?.join(', ')}`,
|
||||
developer: repo.author,
|
||||
downloads: repo.downloads || 0,
|
||||
num_quants: quants.length,
|
||||
quants: quants,
|
||||
created_at: repo.created_at,
|
||||
readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
|
||||
}
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
const toggleModelExpansion = (modelId: string) => {
|
||||
setExpandedModels((prev) => ({
|
||||
...prev,
|
||||
@ -141,35 +101,6 @@ function Hub() {
|
||||
}))
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
if (search.repo) {
|
||||
setSearchValue(search.repo || '')
|
||||
setIsSearching(true)
|
||||
|
||||
addModelSourceTimeoutRef.current = setTimeout(async () => {
|
||||
try {
|
||||
// Fetch HuggingFace repository information
|
||||
const repoInfo = await fetchHuggingFaceRepo(search.repo)
|
||||
if (repoInfo) {
|
||||
const catalogModel = convertHfRepoToCatalogModel(repoInfo)
|
||||
if (
|
||||
!sources.some((s) => s.model_name === catalogModel.model_name)
|
||||
) {
|
||||
setHuggingFaceRepo(catalogModel)
|
||||
addSource(catalogModel)
|
||||
}
|
||||
}
|
||||
|
||||
await fetchSources()
|
||||
} catch (error) {
|
||||
console.error('Error fetching repository info:', error)
|
||||
} finally {
|
||||
setIsSearching(false)
|
||||
}
|
||||
}, 500)
|
||||
}
|
||||
}, [convertHfRepoToCatalogModel, fetchSources, addSource, search, sources])
|
||||
|
||||
// Sorting functionality
|
||||
const sortedModels = useMemo(() => {
|
||||
return [...sources].sort((a, b) => {
|
||||
@ -264,9 +195,6 @@ function Hub() {
|
||||
addSource(catalogModel)
|
||||
}
|
||||
}
|
||||
|
||||
// Original addSource logic (if needed)
|
||||
await fetchSources()
|
||||
} catch (error) {
|
||||
console.error('Error fetching repository info:', error)
|
||||
} finally {
|
||||
|
||||
@ -83,11 +83,7 @@ function MCPServers() {
|
||||
}
|
||||
|
||||
const handleSaveServer = async (name: string, config: MCPServerConfig) => {
|
||||
try {
|
||||
await toggleServer(name, false)
|
||||
} catch (error) {
|
||||
console.error('Error deactivating server:', error)
|
||||
}
|
||||
toggleServer(name, false)
|
||||
if (editingKey) {
|
||||
// If server name changed, delete old one and add new one
|
||||
if (editingKey !== name) {
|
||||
@ -102,7 +98,7 @@ function MCPServers() {
|
||||
}
|
||||
|
||||
syncServers()
|
||||
await toggleServer(name, true)
|
||||
toggleServer(name, true)
|
||||
}
|
||||
|
||||
const handleEdit = (serverKey: string) => {
|
||||
@ -147,25 +143,26 @@ function MCPServers() {
|
||||
) => {
|
||||
if (jsonServerName) {
|
||||
try {
|
||||
await toggleServer(jsonServerName, false)
|
||||
toggleServer(jsonServerName, false)
|
||||
} catch (error) {
|
||||
console.error('Error deactivating server:', error)
|
||||
}
|
||||
// Save single server
|
||||
editServer(jsonServerName, data as MCPServerConfig)
|
||||
syncServers()
|
||||
toggleServer(jsonServerName, true)
|
||||
toggleServer(jsonServerName, (data as MCPServerConfig).active || false)
|
||||
} else {
|
||||
// Save all servers
|
||||
// Clear existing servers first
|
||||
Object.keys(mcpServers).forEach((key) => {
|
||||
deleteServer(key)
|
||||
Object.keys(mcpServers).forEach((serverKey) => {
|
||||
toggleServer(serverKey, false)
|
||||
deleteServer(serverKey)
|
||||
})
|
||||
|
||||
// Add all servers from the JSON
|
||||
Object.entries(data as Record<string, MCPServerConfig>).forEach(
|
||||
([key, config]) => {
|
||||
addServer(key, config)
|
||||
toggleServer(key, config.active || false)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
@ -4,8 +4,7 @@ import { UIEventHandler } from 'react'
|
||||
import debounce from 'lodash.debounce'
|
||||
import cloneDeep from 'lodash.clonedeep'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { ArrowDown } from 'lucide-react'
|
||||
import { Play } from 'lucide-react'
|
||||
import { ArrowDown, Play } from 'lucide-react'
|
||||
|
||||
import HeaderPage from '@/containers/HeaderPage'
|
||||
import { useThreads } from '@/hooks/useThreads'
|
||||
@ -328,7 +327,7 @@ function ThreadDetail() {
|
||||
>
|
||||
{showScrollToBottomBtn && (
|
||||
<div
|
||||
className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
|
||||
className="bg-main-view-fg/10 px-2 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
|
||||
onClick={() => {
|
||||
scrollToBottom(true)
|
||||
setIsUserScrolling(false)
|
||||
@ -340,7 +339,7 @@ function ThreadDetail() {
|
||||
)}
|
||||
{showGenerateAIResponseBtn && (
|
||||
<div
|
||||
className="bg-main-view-fg/10 px-4 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
|
||||
className="mx-2 bg-main-view-fg/10 px-2 border border-main-view-fg/5 flex items-center justify-center rounded-xl gap-x-2 cursor-pointer pointer-events-auto"
|
||||
onClick={generateAIResponse}
|
||||
>
|
||||
<p className="text-xs">{t('common:generateAiResponse')}</p>
|
||||
|
||||
@ -4,6 +4,7 @@ import {
|
||||
fetchModels,
|
||||
fetchModelCatalog,
|
||||
fetchHuggingFaceRepo,
|
||||
convertHfRepoToCatalogModel,
|
||||
updateModel,
|
||||
pullModel,
|
||||
abortDownload,
|
||||
@ -12,6 +13,8 @@ import {
|
||||
stopModel,
|
||||
stopAllModels,
|
||||
startModel,
|
||||
HuggingFaceRepo,
|
||||
CatalogModel,
|
||||
} from '../models'
|
||||
import { EngineManager, Model } from '@janhq/core'
|
||||
|
||||
@ -334,7 +337,9 @@ describe('models service', () => {
|
||||
})
|
||||
|
||||
// Test with full URL
|
||||
await fetchHuggingFaceRepo('https://huggingface.co/microsoft/DialoGPT-medium')
|
||||
await fetchHuggingFaceRepo(
|
||||
'https://huggingface.co/microsoft/DialoGPT-medium'
|
||||
)
|
||||
expect(fetch).toHaveBeenCalledWith(
|
||||
'https://huggingface.co/api/models/microsoft/DialoGPT-medium?blobs=true'
|
||||
)
|
||||
@ -380,7 +385,7 @@ describe('models service', () => {
|
||||
|
||||
it('should handle other HTTP errors', async () => {
|
||||
const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
|
||||
|
||||
|
||||
;(fetch as any).mockResolvedValue({
|
||||
ok: false,
|
||||
status: 500,
|
||||
@ -394,13 +399,13 @@ describe('models service', () => {
|
||||
'Error fetching HuggingFace repository:',
|
||||
expect.any(Error)
|
||||
)
|
||||
|
||||
|
||||
consoleSpy.mockRestore()
|
||||
})
|
||||
|
||||
it('should handle network errors', async () => {
|
||||
const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
|
||||
|
||||
|
||||
;(fetch as any).mockRejectedValue(new Error('Network error'))
|
||||
|
||||
const result = await fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
|
||||
@ -410,7 +415,7 @@ describe('models service', () => {
|
||||
'Error fetching HuggingFace repository:',
|
||||
expect.any(Error)
|
||||
)
|
||||
|
||||
|
||||
consoleSpy.mockRestore()
|
||||
})
|
||||
|
||||
@ -524,7 +529,303 @@ describe('models service', () => {
|
||||
|
||||
expect(result).toEqual(mockRepoData)
|
||||
// Verify the GGUF file is present in siblings
|
||||
expect(result?.siblings?.some(s => s.rfilename.endsWith('.gguf'))).toBe(true)
|
||||
expect(result?.siblings?.some((s) => s.rfilename.endsWith('.gguf'))).toBe(
|
||||
true
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
describe('convertHfRepoToCatalogModel', () => {
|
||||
const mockHuggingFaceRepo: HuggingFaceRepo = {
|
||||
id: 'microsoft/DialoGPT-medium',
|
||||
modelId: 'microsoft/DialoGPT-medium',
|
||||
sha: 'abc123',
|
||||
downloads: 1500,
|
||||
likes: 75,
|
||||
tags: ['pytorch', 'transformers', 'text-generation'],
|
||||
pipeline_tag: 'text-generation',
|
||||
created_at: '2021-01-01T00:00:00Z',
|
||||
last_modified: '2021-12-01T00:00:00Z',
|
||||
private: false,
|
||||
disabled: false,
|
||||
gated: false,
|
||||
author: 'microsoft',
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'model-q4_0.gguf',
|
||||
size: 2 * 1024 * 1024 * 1024, // 2GB
|
||||
blobId: 'blob123',
|
||||
},
|
||||
{
|
||||
rfilename: 'model-q8_0.GGUF', // Test case-insensitive matching
|
||||
size: 4 * 1024 * 1024 * 1024, // 4GB
|
||||
blobId: 'blob456',
|
||||
},
|
||||
{
|
||||
rfilename: 'tokenizer.json', // Non-GGUF file (should be filtered out)
|
||||
size: 1024 * 1024, // 1MB
|
||||
blobId: 'blob789',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
it('should convert HuggingFace repo to catalog model format', () => {
|
||||
const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
|
||||
|
||||
const expected: CatalogModel = {
|
||||
model_name: 'microsoft/DialoGPT-medium',
|
||||
description: '**Tags**: pytorch, transformers, text-generation',
|
||||
developer: 'microsoft',
|
||||
downloads: 1500,
|
||||
num_quants: 2,
|
||||
quants: [
|
||||
{
|
||||
model_id: 'model-q4_0',
|
||||
path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf',
|
||||
file_size: '2.0 GB',
|
||||
},
|
||||
{
|
||||
model_id: 'model-q8_0',
|
||||
path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF',
|
||||
file_size: '4.0 GB',
|
||||
},
|
||||
],
|
||||
created_at: '2021-01-01T00:00:00Z',
|
||||
readme:
|
||||
'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md',
|
||||
}
|
||||
|
||||
expect(result).toEqual(expected)
|
||||
})
|
||||
|
||||
it('should handle repository with no GGUF files', () => {
|
||||
const repoWithoutGGUF: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'tokenizer.json',
|
||||
size: 1024 * 1024,
|
||||
blobId: 'blob789',
|
||||
},
|
||||
{
|
||||
rfilename: 'config.json',
|
||||
size: 2048,
|
||||
blobId: 'blob101',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithoutGGUF)
|
||||
|
||||
expect(result.num_quants).toBe(0)
|
||||
expect(result.quants).toEqual([])
|
||||
})
|
||||
|
||||
it('should handle repository with no siblings', () => {
|
||||
const repoWithoutSiblings: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
siblings: undefined,
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithoutSiblings)
|
||||
|
||||
expect(result.num_quants).toBe(0)
|
||||
expect(result.quants).toEqual([])
|
||||
})
|
||||
|
||||
it('should format file sizes correctly', () => {
|
||||
const repoWithVariousFileSizes: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'small-model.gguf',
|
||||
size: 500 * 1024 * 1024, // 500MB
|
||||
blobId: 'blob1',
|
||||
},
|
||||
{
|
||||
rfilename: 'large-model.gguf',
|
||||
size: 3.5 * 1024 * 1024 * 1024, // 3.5GB
|
||||
blobId: 'blob2',
|
||||
},
|
||||
{
|
||||
rfilename: 'unknown-size.gguf',
|
||||
// No size property
|
||||
blobId: 'blob3',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithVariousFileSizes)
|
||||
|
||||
expect(result.quants[0].file_size).toBe('500.0 MB')
|
||||
expect(result.quants[1].file_size).toBe('3.5 GB')
|
||||
expect(result.quants[2].file_size).toBe('Unknown size')
|
||||
})
|
||||
|
||||
it('should handle empty or undefined tags', () => {
|
||||
const repoWithEmptyTags: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
tags: [],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithEmptyTags)
|
||||
|
||||
expect(result.description).toBe('**Tags**: ')
|
||||
})
|
||||
|
||||
it('should handle missing downloads count', () => {
|
||||
const repoWithoutDownloads: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
downloads: undefined as any,
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithoutDownloads)
|
||||
|
||||
expect(result.downloads).toBe(0)
|
||||
})
|
||||
|
||||
it('should correctly remove .gguf extension from model IDs', () => {
|
||||
const repoWithVariousGGUF: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'model.gguf',
|
||||
size: 1024,
|
||||
blobId: 'blob1',
|
||||
},
|
||||
{
|
||||
rfilename: 'MODEL.GGUF',
|
||||
size: 1024,
|
||||
blobId: 'blob2',
|
||||
},
|
||||
{
|
||||
rfilename: 'complex-model-name.gguf',
|
||||
size: 1024,
|
||||
blobId: 'blob3',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithVariousGGUF)
|
||||
|
||||
expect(result.quants[0].model_id).toBe('model')
|
||||
expect(result.quants[1].model_id).toBe('MODEL')
|
||||
expect(result.quants[2].model_id).toBe('complex-model-name')
|
||||
})
|
||||
|
||||
it('should generate correct download paths', () => {
|
||||
const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
|
||||
|
||||
expect(result.quants[0].path).toBe(
|
||||
'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf'
|
||||
)
|
||||
expect(result.quants[1].path).toBe(
|
||||
'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF'
|
||||
)
|
||||
})
|
||||
|
||||
it('should generate correct readme URL', () => {
|
||||
const result = convertHfRepoToCatalogModel(mockHuggingFaceRepo)
|
||||
|
||||
expect(result.readme).toBe(
|
||||
'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md'
|
||||
)
|
||||
})
|
||||
|
||||
it('should handle GGUF files with case-insensitive extension matching', () => {
|
||||
const repoWithMixedCase: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'model-1.gguf',
|
||||
size: 1024,
|
||||
blobId: 'blob1',
|
||||
},
|
||||
{
|
||||
rfilename: 'model-2.GGUF',
|
||||
size: 1024,
|
||||
blobId: 'blob2',
|
||||
},
|
||||
{
|
||||
rfilename: 'model-3.GgUf',
|
||||
size: 1024,
|
||||
blobId: 'blob3',
|
||||
},
|
||||
{
|
||||
rfilename: 'not-a-model.txt',
|
||||
size: 1024,
|
||||
blobId: 'blob4',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithMixedCase)
|
||||
|
||||
expect(result.num_quants).toBe(3)
|
||||
expect(result.quants).toHaveLength(3)
|
||||
expect(result.quants[0].model_id).toBe('model-1')
|
||||
expect(result.quants[1].model_id).toBe('model-2')
|
||||
expect(result.quants[2].model_id).toBe('model-3')
|
||||
})
|
||||
|
||||
it('should handle edge cases with file size formatting', () => {
|
||||
const repoWithEdgeCases: HuggingFaceRepo = {
|
||||
...mockHuggingFaceRepo,
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'tiny.gguf',
|
||||
size: 512, // < 1MB
|
||||
blobId: 'blob1',
|
||||
},
|
||||
{
|
||||
rfilename: 'exactly-1gb.gguf',
|
||||
size: 1024 * 1024 * 1024, // Exactly 1GB
|
||||
blobId: 'blob2',
|
||||
},
|
||||
{
|
||||
rfilename: 'zero-size.gguf',
|
||||
size: 0,
|
||||
blobId: 'blob3',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(repoWithEdgeCases)
|
||||
|
||||
expect(result.quants[0].file_size).toBe('0.0 MB')
|
||||
expect(result.quants[1].file_size).toBe('1.0 GB')
|
||||
expect(result.quants[2].file_size).toBe('Unknown size') // 0 is falsy, so it returns 'Unknown size'
|
||||
})
|
||||
|
||||
it('should handle missing optional fields gracefully', () => {
|
||||
const minimalRepo: HuggingFaceRepo = {
|
||||
id: 'minimal/repo',
|
||||
modelId: 'minimal/repo',
|
||||
sha: 'abc123',
|
||||
downloads: 0,
|
||||
likes: 0,
|
||||
tags: [],
|
||||
created_at: '2021-01-01T00:00:00Z',
|
||||
last_modified: '2021-12-01T00:00:00Z',
|
||||
private: false,
|
||||
disabled: false,
|
||||
gated: false,
|
||||
author: 'minimal',
|
||||
siblings: [
|
||||
{
|
||||
rfilename: 'model.gguf',
|
||||
blobId: 'blob1',
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
const result = convertHfRepoToCatalogModel(minimalRepo)
|
||||
|
||||
expect(result.model_name).toBe('minimal/repo')
|
||||
expect(result.developer).toBe('minimal')
|
||||
expect(result.downloads).toBe(0)
|
||||
expect(result.description).toBe('**Tags**: ')
|
||||
expect(result.quants[0].file_size).toBe('Unknown size')
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -134,6 +134,47 @@ export const fetchHuggingFaceRepo = async (
|
||||
}
|
||||
}
|
||||
|
||||
// Convert HuggingFace repository to CatalogModel format
|
||||
export const convertHfRepoToCatalogModel = (
|
||||
repo: HuggingFaceRepo
|
||||
): CatalogModel => {
|
||||
// Extract GGUF files from the repository siblings
|
||||
const ggufFiles =
|
||||
repo.siblings?.filter((file) =>
|
||||
file.rfilename.toLowerCase().endsWith('.gguf')
|
||||
) || []
|
||||
|
||||
// Convert GGUF files to quants format
|
||||
const quants = ggufFiles.map((file) => {
|
||||
// Format file size
|
||||
const formatFileSize = (size?: number) => {
|
||||
if (!size) return 'Unknown size'
|
||||
if (size < 1024 ** 3) return `${(size / 1024 ** 2).toFixed(1)} MB`
|
||||
return `${(size / 1024 ** 3).toFixed(1)} GB`
|
||||
}
|
||||
|
||||
// Generate model_id from filename (remove .gguf extension, case-insensitive)
|
||||
const modelId = file.rfilename.replace(/\.gguf$/i, '')
|
||||
|
||||
return {
|
||||
model_id: modelId,
|
||||
path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
|
||||
file_size: formatFileSize(file.size),
|
||||
}
|
||||
})
|
||||
|
||||
return {
|
||||
model_name: repo.modelId,
|
||||
description: `**Tags**: ${repo.tags?.join(', ')}`,
|
||||
developer: repo.author,
|
||||
downloads: repo.downloads || 0,
|
||||
num_quants: quants.length,
|
||||
quants: quants,
|
||||
created_at: repo.created_at,
|
||||
readme: `https://huggingface.co/${repo.modelId}/resolve/main/README.md`,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates a model.
|
||||
* @param model The model to update.
|
||||
|
||||
1
web-app/src/types/global.d.ts
vendored
@ -19,6 +19,7 @@ declare global {
|
||||
declare const POSTHOG_KEY: string
|
||||
declare const POSTHOG_HOST: string
|
||||
declare const MODEL_CATALOG_URL: string
|
||||
declare const AUTO_UPDATER_DISABLED: boolean
|
||||
interface Window {
|
||||
core: AppCore | undefined
|
||||
}
|
||||
|
||||
@ -33,19 +33,19 @@ export default defineConfig(({ mode }) => {
|
||||
define: {
|
||||
IS_TAURI: JSON.stringify(process.env.IS_TAURI),
|
||||
IS_MACOS: JSON.stringify(
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? 'false'
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('darwin') ?? false
|
||||
),
|
||||
IS_WINDOWS: JSON.stringify(
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? 'false'
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('windows') ?? false
|
||||
),
|
||||
IS_LINUX: JSON.stringify(
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? 'false'
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('linux') ?? false
|
||||
),
|
||||
IS_IOS: JSON.stringify(
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? 'false'
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('ios') ?? false
|
||||
),
|
||||
IS_ANDROID: JSON.stringify(
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('android') ?? 'false'
|
||||
process.env.TAURI_ENV_PLATFORM?.includes('android') ?? false
|
||||
),
|
||||
PLATFORM: JSON.stringify(process.env.TAURI_ENV_PLATFORM),
|
||||
|
||||
@ -56,6 +56,9 @@ export default defineConfig(({ mode }) => {
|
||||
MODEL_CATALOG_URL: JSON.stringify(
|
||||
'https://raw.githubusercontent.com/menloresearch/model-catalog/main/model_catalog.json'
|
||||
),
|
||||
AUTO_UPDATER_DISABLED: JSON.stringify(
|
||||
env.AUTO_UPDATER_DISABLED === 'true'
|
||||
),
|
||||
},
|
||||
|
||||
// Vite options tailored for Tauri development and only applied in `tauri dev` or `tauri build`
|
||||
|
||||
@ -38,5 +38,6 @@ export default defineConfig({
|
||||
VERSION: JSON.stringify('test'),
|
||||
POSTHOG_KEY: JSON.stringify(''),
|
||||
POSTHOG_HOST: JSON.stringify(''),
|
||||
AUTO_UPDATER_DISABLED: JSON.stringify('false'),
|
||||
},
|
||||
})
|
||||
|
||||
21
website/.gitignore
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
# build output
|
||||
dist/
|
||||
# generated types
|
||||
.astro/
|
||||
|
||||
# dependencies
|
||||
node_modules/
|
||||
|
||||
# logs
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
|
||||
|
||||
# environment variables
|
||||
.env
|
||||
.env.production
|
||||
|
||||
# macOS-specific files
|
||||
.DS_Store
|
||||
28
website/README.md
Normal file
@ -0,0 +1,28 @@
|
||||
# Jan's Website
|
||||
|
||||
This website is [built with Starlight](https://starlight.astro.build)
|
||||
|
||||
|
||||
Starlight looks for `.md` or `.mdx` files in the `src/content/docs/` directory. Each file is exposed
|
||||
as a route based on its file name.
|
||||
|
||||
Images can be added to `src/assets/` and embedded in Markdown with a relative link.
|
||||
|
||||
Static assets, like favicons, can be placed in the `public/` directory.
|
||||
|
||||
If you want to add new pages, these can go in the `src/pages/` directory. Because of the topics plugin
|
||||
we are using ([starlight sidebar topics](https://starlight-sidebar-topics.netlify.app/docs/guides/excluded-pages/))
|
||||
you will need to exclude them from the sidebar by adding them to the exclude list in `astro.config.mjs`, e.g., `exclude: ['/example'],`.
|
||||
|
||||
## 🧞 Commands
|
||||
|
||||
All commands are run from the root of the project, from a terminal:
|
||||
|
||||
| Command | Action |
|
||||
| :------------------------ | :----------------------------------------------- |
|
||||
| `bun install` | Installs dependencies |
|
||||
| `bun dev` | Starts local dev server at `localhost:4321` |
|
||||
| `bun build` | Build your production site to `./dist/` |
|
||||
| `bun preview` | Preview your build locally, before deploying |
|
||||
| `bun astro ...` | Run CLI commands like `astro add`, `astro check` |
|
||||
| `bun astro -- --help` | Get help using the Astro CLI |
|
||||
191
website/astro.config.mjs
Normal file
@ -0,0 +1,191 @@
|
||||
// @ts-check
|
||||
import { defineConfig } from 'astro/config'
|
||||
import starlight from '@astrojs/starlight'
|
||||
import starlightThemeRapide from 'starlight-theme-rapide'
|
||||
import starlightSidebarTopics from 'starlight-sidebar-topics'
|
||||
import mermaid from 'astro-mermaid'
|
||||
|
||||
// https://astro.build/config
|
||||
export default defineConfig({
|
||||
// Deploy to the new v2 subdomain
|
||||
site: 'https://v2.jan.ai',
|
||||
// No 'base' property is needed, as this will be deployed to the root of the subdomain.
|
||||
integrations: [
|
||||
mermaid({
|
||||
theme: 'default',
|
||||
autoTheme: true,
|
||||
}),
|
||||
starlight({
|
||||
title: '👋 Jan',
|
||||
favicon: 'jan2.png',
|
||||
plugins: [
|
||||
starlightThemeRapide(),
|
||||
starlightSidebarTopics(
|
||||
[
|
||||
{
|
||||
label: 'Jan Desktop',
|
||||
link: '/',
|
||||
icon: 'rocket',
|
||||
items: [
|
||||
{
|
||||
label: 'HOW TO',
|
||||
items: [
|
||||
{
|
||||
label: 'Install 👋 Jan',
|
||||
collapsed: false,
|
||||
autogenerate: { directory: 'jan/installation' },
|
||||
},
|
||||
{ label: 'Start Chatting', slug: 'jan/threads' },
|
||||
{
|
||||
label: 'Use Jan Models',
|
||||
collapsed: true,
|
||||
autogenerate: { directory: 'jan/jan-models' },
|
||||
},
|
||||
{ label: 'Assistants', slug: 'jan/assistants' },
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Cloud Providers',
|
||||
items: [
|
||||
{ label: 'Anthropic', slug: 'jan/remote-models/anthropic' },
|
||||
{ label: 'OpenAI', slug: 'jan/remote-models/openai' },
|
||||
{ label: 'Gemini', slug: 'jan/remote-models/google' },
|
||||
{
|
||||
label: 'OpenRouter',
|
||||
slug: 'jan/remote-models/openrouter',
|
||||
},
|
||||
{ label: 'Cohere', slug: 'jan/remote-models/cohere' },
|
||||
{ label: 'Mistral', slug: 'jan/remote-models/mistralai' },
|
||||
{ label: 'Groq', slug: 'jan/remote-models/groq' },
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'EXPLANATION',
|
||||
items: [
|
||||
{
|
||||
label: 'Local AI Engine',
|
||||
slug: 'jan/explanation/llama-cpp',
|
||||
},
|
||||
{
|
||||
label: 'Model Parameters',
|
||||
slug: 'jan/explanation/model-parameters',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'ADVANCED',
|
||||
items: [
|
||||
{ label: 'Manage Models', slug: 'jan/manage-models' },
|
||||
{ label: 'Model Context Protocol', slug: 'jan/mcp' },
|
||||
{
|
||||
label: 'MCP Examples',
|
||||
collapsed: true,
|
||||
items: [
|
||||
{
|
||||
label: 'Browser Control (Browserbase)',
|
||||
slug: 'jan/mcp-examples/browser/browserbase',
|
||||
},
|
||||
{
|
||||
label: 'Code Sandbox (E2B)',
|
||||
slug: 'jan/mcp-examples/data-analysis/e2b',
|
||||
},
|
||||
{
|
||||
label: 'Design Creation (Canva)',
|
||||
slug: 'jan/mcp-examples/design/canva',
|
||||
},
|
||||
{
|
||||
label: 'Deep Research (Octagon)',
|
||||
slug: 'jan/mcp-examples/deepresearch/octagon',
|
||||
},
|
||||
{
|
||||
label: 'Web Search with Exa',
|
||||
slug: 'jan/mcp-examples/search/exa',
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Local Server',
|
||||
items: [
|
||||
{ label: 'Introduction', link: '/local-server/' },
|
||||
{ label: 'Server Setup', slug: 'local-server/api-server' },
|
||||
{
|
||||
label: 'Jan Data Folder',
|
||||
slug: 'local-server/data-folder',
|
||||
},
|
||||
{ label: 'Server Settings', slug: 'local-server/settings' },
|
||||
{
|
||||
label: 'Llama.cpp Server',
|
||||
slug: 'local-server/llama-cpp',
|
||||
},
|
||||
{
|
||||
label: 'Server Troubleshooting',
|
||||
slug: 'local-server/troubleshooting',
|
||||
},
|
||||
{
|
||||
label: 'Integrations',
|
||||
collapsed: true,
|
||||
autogenerate: { directory: 'local-server/integrations' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'REFERENCE',
|
||||
items: [
|
||||
{ label: 'Settings', slug: 'jan/settings' },
|
||||
{ label: 'Jan Data Folder', slug: 'jan/data-folder' },
|
||||
{ label: 'Troubleshooting', slug: 'jan/troubleshooting' },
|
||||
{ label: 'Privacy Policy', slug: 'jan/privacy' },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
label: 'Jan Mobile',
|
||||
link: '/mobile/',
|
||||
badge: { text: 'Coming Soon', variant: 'caution' },
|
||||
icon: 'phone',
|
||||
items: [{ label: 'Overview', slug: 'mobile' }],
|
||||
},
|
||||
{
|
||||
label: 'Jan Server',
|
||||
link: '/server/',
|
||||
badge: { text: 'Coming Soon', variant: 'caution' },
|
||||
icon: 'forward-slash',
|
||||
items: [{ label: 'Overview', slug: 'server' }],
|
||||
},
|
||||
],
|
||||
{
|
||||
exclude: [
|
||||
'/prods',
|
||||
'/api-reference',
|
||||
'/products',
|
||||
'/products/**/*',
|
||||
],
|
||||
}
|
||||
),
|
||||
],
|
||||
social: [
|
||||
{
|
||||
icon: 'github',
|
||||
label: 'GitHub',
|
||||
href: 'https://github.com/menloresearch/jan',
|
||||
},
|
||||
{
|
||||
icon: 'x.com',
|
||||
label: 'X',
|
||||
href: 'https://twitter.com/jandotai',
|
||||
},
|
||||
{
|
||||
icon: 'discord',
|
||||
label: 'Discord',
|
||||
href: 'https://discord.com/invite/FTk2MvZwJH',
|
||||
},
|
||||
],
|
||||
components: {
|
||||
Header: './src/components/CustomNav.astro',
|
||||
},
|
||||
}),
|
||||
],
|
||||
})
|
||||
1299
website/bun.lock
Normal file
27
website/package.json
Normal file
@ -0,0 +1,27 @@
|
||||
{
|
||||
"name": "website",
|
||||
"type": "module",
|
||||
"version": "0.0.1",
|
||||
"scripts": {
|
||||
"dev": "astro dev",
|
||||
"start": "astro dev",
|
||||
"build": "astro build",
|
||||
"preview": "astro preview",
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/starlight": "^0.35.1",
|
||||
"@lorenzo_lewis/starlight-utils": "^0.3.2",
|
||||
"astro": "^5.6.1",
|
||||
"astro-mermaid": "^1.0.4",
|
||||
"gsap": "^3.13.0",
|
||||
"mermaid": "^11.9.0",
|
||||
"phosphor-astro": "^2.1.0",
|
||||
"sharp": "^0.34.3",
|
||||
"starlight-openapi": "^0.19.1",
|
||||
"starlight-sidebar-topics": "^0.6.0",
|
||||
"starlight-theme-rapide": "^0.5.1",
|
||||
"starlight-videos": "^0.3.0"
|
||||
},
|
||||
"packageManager": "yarn@1.22.22"
|
||||
}
|
||||
1
website/public/favicon.svg
Normal file
@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 128 128"><path fill-rule="evenodd" d="M81 36 64 0 47 36l-1 2-9-10a6 6 0 0 0-9 9l10 10h-2L0 64l36 17h2L28 91a6 6 0 1 0 9 9l9-10 1 2 17 36 17-36v-2l9 10a6 6 0 1 0 9-9l-9-9 2-1 36-17-36-17-2-1 9-9a6 6 0 1 0-9-9l-9 10v-2Zm-17 2-2 5c-4 8-11 15-19 19l-5 2 5 2c8 4 15 11 19 19l2 5 2-5c4-8 11-15 19-19l5-2-5-2c-8-4-15-11-19-19l-2-5Z" clip-rule="evenodd"/><path d="M118 19a6 6 0 0 0-9-9l-3 3a6 6 0 1 0 9 9l3-3Zm-96 4c-2 2-6 2-9 0l-3-3a6 6 0 1 1 9-9l3 3c3 2 3 6 0 9Zm0 82c-2-2-6-2-9 0l-3 3a6 6 0 1 0 9 9l3-3c3-2 3-6 0-9Zm96 4a6 6 0 0 1-9 9l-3-3a6 6 0 1 1 9-9l3 3Z"/><style>path{fill:#000}@media (prefers-color-scheme:dark){path{fill:#fff}}</style></svg>
|
||||
|
After Width: | Height: | Size: 696 B |
BIN
website/public/gifs/jan-nano-demo.gif
Normal file
|
After Width: | Height: | Size: 22 MiB |
BIN
website/public/jan.png
Normal file
|
After Width: | Height: | Size: 1.5 MiB |
BIN
website/public/jan2.png
Normal file
|
After Width: | Height: | Size: 1.8 MiB |
1459
website/public/openapi/openapi.json
Normal file
BIN
website/public/videos/jan-nano-demo.mp4
Normal file
BIN
website/src/assets/add_assistant.png
Normal file
|
After Width: | Height: | Size: 163 KiB |
BIN
website/src/assets/anthropic.png
Normal file
|
After Width: | Height: | Size: 149 KiB |
BIN
website/src/assets/api-server-logs.png
Normal file
|
After Width: | Height: | Size: 1.4 MiB |
BIN
website/src/assets/api-server-ui.png
Normal file
|
After Width: | Height: | Size: 562 KiB |
BIN
website/src/assets/api-server.png
Normal file
|
After Width: | Height: | Size: 598 KiB |
BIN
website/src/assets/api-server2.png
Normal file
|
After Width: | Height: | Size: 306 KiB |
BIN
website/src/assets/assistant-add-dialog.png
Normal file
|
After Width: | Height: | Size: 85 KiB |
BIN
website/src/assets/assistant-dropdown-updated.png
Normal file
|
After Width: | Height: | Size: 120 KiB |
BIN
website/src/assets/assistant-dropdown.png
Normal file
|
After Width: | Height: | Size: 450 KiB |
BIN
website/src/assets/assistant-edit-dialog.png
Normal file
|
After Width: | Height: | Size: 118 KiB |
BIN
website/src/assets/assistants-ui-overview.png
Normal file
|
After Width: | Height: | Size: 453 KiB |
BIN
website/src/assets/browserbase.png
Normal file
|
After Width: | Height: | Size: 714 KiB |
BIN
website/src/assets/browserbase2.png
Normal file
|
After Width: | Height: | Size: 554 KiB |
BIN
website/src/assets/browserbase3.png
Normal file
|
After Width: | Height: | Size: 377 KiB |
BIN
website/src/assets/browserbase4.png
Normal file
|
After Width: | Height: | Size: 453 KiB |
BIN
website/src/assets/browserbase5.png
Normal file
|
After Width: | Height: | Size: 616 KiB |
BIN
website/src/assets/browserbase6.png
Normal file
|
After Width: | Height: | Size: 742 KiB |
BIN
website/src/assets/browserbase7.png
Normal file
|
After Width: | Height: | Size: 544 KiB |
BIN
website/src/assets/canva.png
Normal file
|
After Width: | Height: | Size: 404 KiB |
BIN
website/src/assets/canva2.png
Normal file
|
After Width: | Height: | Size: 4.4 MiB |
BIN
website/src/assets/canva3.png
Normal file
|
After Width: | Height: | Size: 432 KiB |
BIN
website/src/assets/canva4.png
Normal file
|
After Width: | Height: | Size: 499 KiB |