Merge branch 'dev' into dev-web

This commit is contained in:
Dinh Long Nguyen 2025-09-26 15:55:14 +07:00
commit efdd1b3971
140 changed files with 5806 additions and 5186 deletions

View File

@ -53,6 +53,9 @@ jobs:
- name: Install dependencies
working-directory: docs
run: yarn install
- name: Clean output directory
working-directory: docs
run: rm -rf out/* .next/*
- name: Build website
working-directory: docs
run: export NODE_ENV=production && yarn build && cp _redirects out/_redirects && cp _headers out/_headers

View File

@ -15,7 +15,6 @@ on:
- 'pre-install/**'
- 'Makefile'
- 'package.json'
- 'mise.toml'
jobs:
get-update-version:

View File

@ -35,7 +35,6 @@ on:
- 'pre-install/**'
- 'Makefile'
- 'package.json'
- 'mise.toml'
jobs:

View File

@ -79,8 +79,33 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json

View File

@ -100,13 +100,36 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Change app name for beta and nightly builds
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json

View File

@ -53,7 +53,7 @@ on:
value: ${{ jobs.build-linux-x64.outputs.APPIMAGE_FILE_NAME }}
jobs:
build-linux-x64:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
outputs:
DEB_SIG: ${{ steps.packageinfo.outputs.DEB_SIG }}
APPIMAGE_SIG: ${{ steps.packageinfo.outputs.APPIMAGE_SIG }}
@ -117,11 +117,34 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Change app name for beta and nightly builds

View File

@ -42,31 +42,6 @@ jobs:
run: |
cargo install ctoml
- name: Create bun and uv universal
run: |
mkdir -p ./src-tauri/resources/bin/
cd ./src-tauri/resources/bin/
curl -L -o bun-darwin-x64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-x64.zip
curl -L -o bun-darwin-aarch64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-aarch64.zip
unzip bun-darwin-x64.zip
unzip bun-darwin-aarch64.zip
lipo -create -output bun-universal-apple-darwin bun-darwin-x64/bun bun-darwin-aarch64/bun
cp -f bun-darwin-aarch64/bun bun-aarch64-apple-darwin
cp -f bun-darwin-x64/bun bun-x86_64-apple-darwin
cp -f bun-universal-apple-darwin bun
curl -L -o uv-x86_64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-x86_64-apple-darwin.tar.gz
curl -L -o uv-arm64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-aarch64-apple-darwin.tar.gz
tar -xzf uv-x86_64.tar.gz
tar -xzf uv-arm64.tar.gz
mv uv-x86_64-apple-darwin uv-x86_64
mv uv-aarch64-apple-darwin uv-aarch64
lipo -create -output uv-universal-apple-darwin uv-x86_64/uv uv-aarch64/uv
cp -f uv-x86_64/uv uv-x86_64-apple-darwin
cp -f uv-aarch64/uv uv-aarch64-apple-darwin
cp -f uv-universal-apple-darwin uv
ls -la
- name: Update app version
run: |
echo "Version: ${{ inputs.new_version }}"
@ -74,8 +49,35 @@ jobs:
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json

View File

@ -101,7 +101,30 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Temporarily enable devtool on prod build

View File

@ -49,68 +49,61 @@ jobs:
# Update tauri.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = false' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
jq '.bundle.windows.nsis.template = "tauri.bundle.windows.nsis.template"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
jq '.bundle.windows.signCommand = "echo External build - skipping signature: %1"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
generate_build_version() {
### Examble
### input 0.5.6 output will be 0.5.6 and 0.5.6.0
### input 0.5.6-rc2-beta output will be 0.5.6 and 0.5.6.2
### input 0.5.6-1213 output will be 0.5.6 and and 0.5.6.1213
local new_version="$1"
local base_version
local t_value
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
# Check if it has a "-"
if [[ "$new_version" == *-* ]]; then
base_version="${new_version%%-*}" # part before -
suffix="${new_version#*-}" # part after -
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
# Check if it is rcX-beta
if [[ "$suffix" =~ ^rc([0-9]+)-beta$ ]]; then
t_value="${BASH_REMATCH[1]}"
else
t_value="$suffix"
fi
else
base_version="$new_version"
t_value="0"
fi
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
# Export two values
new_base_version="$base_version"
new_build_version="${base_version}.${t_value}"
}
generate_build_version ${{ inputs.new_version }}
sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
# Update product name
jq --arg name "Jan-${{ inputs.channel }}" '.productName = $name' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
chmod +x .github/scripts/rename-tauri-app.sh
.github/scripts/rename-tauri-app.sh ./src-tauri/tauri.conf.json ${{ inputs.channel }}
echo "---------tauri.conf.json---------"
cat ./src-tauri/tauri.conf.json
# Update Cargo.toml
ctoml ./src-tauri/Cargo.toml package.name "Jan-${{ inputs.channel }}"
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
echo "------------------"
cat ./src-tauri/Cargo.toml
chmod +x .github/scripts/rename-workspace.sh
.github/scripts/rename-workspace.sh ./package.json ${{ inputs.channel }}
sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
else
sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
cat ./package.json
fi
echo "---------nsis.template---------"
cat ./src-tauri/tauri.bundle.windows.nsis.template
- name: Build app
shell: bash
run: |

View File

@ -95,47 +95,41 @@ jobs:
# Update tauri.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version | .bundle.createUpdaterArtifacts = true' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
jq '.bundle.windows.nsis.template = "tauri.bundle.windows.nsis.template"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------Cargo.toml---------"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
generate_build_version() {
### Examble
### input 0.5.6 output will be 0.5.6 and 0.5.6.0
### input 0.5.6-rc2-beta output will be 0.5.6 and 0.5.6.2
### input 0.5.6-1213 output will be 0.5.6 and and 0.5.6.1213
local new_version="$1"
local base_version
local t_value
# Check if it has a "-"
if [[ "$new_version" == *-* ]]; then
base_version="${new_version%%-*}" # part before -
suffix="${new_version#*-}" # part after -
# Check if it is rcX-beta
if [[ "$suffix" =~ ^rc([0-9]+)-beta$ ]]; then
t_value="${BASH_REMATCH[1]}"
else
t_value="$suffix"
fi
else
base_version="$new_version"
t_value="0"
fi
# Export two values
new_base_version="$base_version"
new_build_version="${base_version}.${t_value}"
}
generate_build_version ${{ inputs.new_version }}
sed -i "s/jan_version/$new_base_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
sed -i "s/jan_build/$new_build_version/g" ./src-tauri/tauri.bundle.windows.nsis.template
# Add sign commands to tauri.windows.conf.json
jq '.bundle.windows.signCommand = "powershell -ExecutionPolicy Bypass -File ./sign.ps1 %1"' ./src-tauri/tauri.windows.conf.json > /tmp/tauri.windows.conf.json
mv /tmp/tauri.windows.conf.json ./src-tauri/tauri.windows.conf.json
echo "---------tauri.windows.conf.json---------"
cat ./src-tauri/tauri.windows.conf.json
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
@ -143,8 +137,13 @@ jobs:
# Change app name for beta and nightly builds
if [ "${{ inputs.channel }}" != "stable" ]; then
# Update updater endpoint
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
# Update product name
jq --arg name "Jan-${{ inputs.channel }}" '.productName = $name' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
chmod +x .github/scripts/rename-tauri-app.sh
.github/scripts/rename-tauri-app.sh ./src-tauri/tauri.conf.json ${{ inputs.channel }}
@ -161,15 +160,7 @@ jobs:
chmod +x .github/scripts/rename-workspace.sh
.github/scripts/rename-workspace.sh ./package.json ${{ inputs.channel }}
cat ./package.json
sed -i "s/jan_productname/Jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
sed -i "s/jan_mainbinaryname/jan-${{ inputs.channel }}/g" ./src-tauri/tauri.bundle.windows.nsis.template
else
sed -i "s/jan_productname/Jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
sed -i "s/jan_mainbinaryname/jan/g" ./src-tauri/tauri.bundle.windows.nsis.template
fi
echo "---------nsis.template---------"
cat ./src-tauri/tauri.bundle.windows.nsis.template
- name: Install AzureSignTool
run: |

View File

@ -126,8 +126,7 @@ jan/
├── scripts/ # Build utilities
├── package.json # Root workspace configuration
├── Makefile # Build automation commands
├── mise.toml # Mise tool configuration
├── Makefile # Build automation commands
├── LICENSE # Apache 2.0 license
└── README.md # Project overview
```
@ -149,19 +148,6 @@ cd jan
make dev
```
**Option 2: The Easier Way (Mise)**
```bash
git clone https://github.com/menloresearch/jan
cd jan
# Install mise
curl https://mise.run | sh
# Let mise handle everything
mise install # installs Node.js, Rust, and other tools
mise dev # runs the full development setup
```
## How Can I Contribute?
### Reporting Bugs

View File

@ -72,6 +72,9 @@ lint: install-and-build
test: lint
yarn download:bin
yarn download:lib
ifeq ($(OS),Windows_NT)
yarn download:windows-installer
endif
yarn test
yarn copy:assets:tauri
yarn build:icon

View File

@ -93,29 +93,6 @@ This handles everything: installs dependencies, builds core components, and laun
- `make test` - Run tests and linting
- `make clean` - Delete everything and start fresh
### Run with Mise (easier)
You can also run with [mise](https://mise.jdx.dev/), which is a bit easier as it ensures Node.js, Rust, and other dependency versions are automatically managed:
```bash
git clone https://github.com/menloresearch/jan
cd jan
# Install mise (if not already installed)
curl https://mise.run | sh
# Install tools and start development
mise install # installs Node.js, Rust, and other tools
mise dev # runs the full development setup
```
**Available mise commands:**
- `mise dev` - Full development setup and launch
- `mise build` - Production build
- `mise test` - Run tests and linting
- `mise clean` - Delete everything and start fresh
- `mise tasks` - List all available tasks
### Manual Commands
```bash

View File

@ -240,6 +240,12 @@ export abstract class AIEngine extends BaseExtension {
EngineManager.instance().register(this)
}
/**
* Gets model info
* @param modelId
*/
abstract get(modelId: string): Promise<modelInfo | undefined>
/**
* Lists available models
*/
@ -268,6 +274,10 @@ export abstract class AIEngine extends BaseExtension {
*/
abstract delete(modelId: string): Promise<void>
/**
* Updates a model
*/
abstract update(modelId: string, model: Partial<modelInfo>): Promise<void>
/**
* Imports a model
*/
@ -283,11 +293,6 @@ export abstract class AIEngine extends BaseExtension {
*/
abstract getLoadedModels(): Promise<string[]>
/**
* Optional method to get the underlying chat client
*/
getChatClient?(sessionId: string): any
/**
* Check if a tool is supported by the model
* @param modelId

View File

@ -43,6 +43,12 @@ const mkdir = (...args: any[]) => globalThis.core.api?.mkdir({ args })
*/
const rm = (...args: any[]) => globalThis.core.api?.rm({ args })
/**
* Moves a file from the source path to the destination path.
* @returns {Promise<any>} A Promise that resolves when the file is moved successfully.
*/
const mv = (...args: any[]) => globalThis.core.api?.mv({ args })
/**
* Deletes a file from the local file system.
* @param {string} path - The path of the file to delete.
@ -92,6 +98,7 @@ export const fs = {
readdirSync,
mkdir,
rm,
mv,
unlinkSync,
appendFileSync,
copyFile,

View File

@ -91,6 +91,7 @@ export enum FileSystemRoute {
existsSync = 'existsSync',
readdirSync = 'readdirSync',
rm = 'rm',
mv = 'mv',
mkdir = 'mkdir',
readFileSync = 'readFileSync',
writeFileSync = 'writeFileSync',

View File

@ -1,699 +1,133 @@
/team /about/team 302
/about/teams /about/team 302
/about/faq /docs 302
/about/acknowledgements /docs 302
/about/community /about 302
/guides /docs 302
/docs/troubleshooting/failed-to-fetch /docs/troubleshooting 302
/guides/troubleshooting/gpu-not-used /docs/troubleshooting#troubleshooting-nvidia-gpu 302
/guides/troubleshooting /docs/troubleshooting 302
/docs/troubleshooting/stuck-on-broken-build /docs/troubleshooting 302
/docs/troubleshooting/somethings-amiss /docs/troubleshooting 302
/docs/troubleshooting/how-to-get-error-logs /docs/troubleshooting 302
/docs/troubleshooting/permission-denied /docs/troubleshooting 302
/docs/troubleshooting/unexpected-token /docs/troubleshooting 302
/docs/troubleshooting/undefined-issue /docs/troubleshooting 302
/getting-started/troubleshooting /docs/troubleshooting 302
/docs/troubleshooting/gpu-not-used /docs/troubleshooting 302
/guides/integrations/openrouter /docs/remote-models/openrouter 302
/guides/integrations/continue /integrations/coding/continue-dev 302
/docs/extension-capabilities /docs/extensions 302
/guides/using-extensions /docs/extensions 302
/docs/extension-guides /docs/extensions 302
/features/extensions /docs/extensions 302
/integrations/tensorrt /docs/built-in/tensorrt-llm 302
/guides/using-models/integrate-with-remote-server /docs/remote-inference/generic-openai 302
/guides/using-models/customize-engine-settings /docs/built-in/llama-cpp 302
/developers/plugins/azure-openai /docs/remote-models/openai 302
/docs/api-reference/assistants /api-reference#tag/assistants 302
/docs/api-reference/models/list /api-reference#tag/models 302
/docs/api-reference/threads /api-reference#tag/chat 302
/docs/api-reference/messages /api-reference#tag/messages 302
/docs/api-reference/models /api-reference#tag/models 302
/chat /docs/threads 302
/guides/chatting/manage-history /docs/threads/ 302
/guides/chatting/start-thread /docs/threads/ 302
/guides/using-server /docs/local-api/ 302
/guides/using-server/server /docs/local-api#step-2-srt-and-use-the-built-in-api-server 302
/docs/get-started /docs 302
/guides/how-jan-works /about/how-we-work 302
/acknowledgements /about/acknowledgements 302
/community /about/community 302
/faq /about/faq 302
/how-we-work /about/how-we-work 302
/wall-of-love /about/wall-of-love 302
/guides/troubleshooting/failed-to-fetch /docs/troubleshooting 302
/docs/troubleshooting/gpu-not-used /docs/troubleshooting 302
/docs/troubleshooting/failed-to-fetch /docs/troubleshooting 302
/guides/ /docs 302
/guides/quickstart/ /docs/quickstart 302
/guides/models/ /docs/models 302
/guides/threads/ /docs/threads 302
/guides/local-api/ /docs/local-api 302
/guides/advanced/ /docs/settings 302
/guides/engines/llamacpp/ /docs/built-in/llama-cpp 302
/guides/engines/tensorrt-llm/ /docs/built-in/tensorrt-llm 302
/guides/engines/lmstudio/ /docs/local-models/lmstudio 302
/guides/engines/ollama/ /docs/built-in/llama-cpp 302
/guides/engines/groq/ /docs/remote-models/groq 302
/guides/engines/mistral/ /docs/remote-models/mistralai 302
/guides/engines/openai/ /docs/remote-models/openai 302
/guides/engines/remote-server/ /docs/remote-inference/generic-openai 302
/extensions/ /docs/extensions 302
/integrations/discord/ /integrations/messaging/llmcord 302
/discord https://discord.gg/FTk2MvZwJH 301
/integrations/interpreter/ /integrations/function-calling/interpreter 302
/integrations/raycast/ /integrations/workflow-automation/raycast 302
/docs/integrations/raycast /integrations/workflow-automation/raycast 302
/docs/integrations /integrations 302
/docs/engineering/files/ /docs 302
/integrations/openrouter/ /docs/remote-models/openrouter 302
/integrations/continue/ /integrations/coding/continue-dev 302
/troubleshooting/ /docs/troubleshooting 302
/changelog/changelog-v0.4.9/ /changelog 302
/changelog/changelog-v0.4.8/ /changelog 302
/changelog/changelog-v0.4.7/ /changelog 302
/changelog/changelog-v0.4.6/ /changelog 302
/changelog/changelog-v0.4.5/ /changelog 302
/changelog/changelog-v0.4.4/ /changelog 302
/changelog/changelog-v0.4.3/ /changelog 302
/changelog/changelog-v0.4.2/ /changelog 302
/changelog/changelog-v0.4.1/ /changelog 302
/changelog/changelog-v0.4.0/ /changelog 302
/changelog/changelog-v0.3.3/ /changelog 302
/changelog/changelog-v0.3.2/ /changelog 302
/changelog/changelog-v0.3.1/ /changelog 302
/changelog/changelog-v0.3.0/ /changelog 302
/changelog/changelog-v0.2.3/ /changelog 302
/changelog/changelog-v0.2.2/ /changelog 302
/changelog/changelog-v0.2.1/ /changelog 302
/changelog/changelog-v0.2.0/ /changelog 302
/team/ /about/team 302
/team/contributor-program/ /about/team 302
/team/join-us/ /about/team 302
/how-we-work/ /about/how-we-work 302
/how-we-work/strategy/ /about/how-we-work/strategy 302
/how-we-work/project-management/ /about/how-we-work/project-management 302
/engineering/ /about/how-we-work/engineering 302
/engineering/ci-cd/ /about/how-we-work/engineering/ci-cd 302
/engineering/qa/ /about/how-we-work/engineering/qa 302
/how-we-work/product-design/ /about 302
/about/how-we-work/product-design /about 302
/how-we-work/analytics/ /about/how-we-work/analytics 302
/how-we-work/website-docs/ /about/how-we-work/website-docs 302
/blog/postmortems/january-10-2024-bitdefender-false-positive-flag/ /post/bitdefender 302
/guides/error-codes/something-amiss/ /docs/troubleshooting#somethings-amiss 302
/guides/error-codes/how-to-get-error-logs/ /docs/troubleshooting#how-to-get-error-logs 302
/guides/chatting/ /docs/threads 302
/guides/integration/openinterpreter/ /integrations/function-calling/interpreter 302
/developer/build-assistant/ /docs/assistants 302
/guides/integrations/ /integrations 302
/specs/hub/ /docs 302
/install/windows/ /docs/desktop/windows 302
/install/linux/ /docs/desktop/linux 302
/install/nightly/ /docs/desktop/windows 302
/docs/engineering/fine-tuning/ /docs 302
/developer/assistant/ /docs/assistants 302
/guides/common-error/broken-build/ /docs/troubleshooting#broken-build 302
/guides/using-server/using-server/ /docs/local-api 302
/guides/integrations/azure-openai-service/ /docs/remote-models/openai 302
/specs/messages/ /docs/threads 302
/docs/engineering/models/ /docs/models 302
/docs/specs/assistants/ /docs/assistants 302
/docs/engineering/chats/ /docs/threads 302
/guides/using-extensions/extension-settings/ /docs/extensions 302
/guides/models/customize-engine/ /docs/models 302
/guides/integration/mistral/ /docs/remote-models/mistralai 302
/guides/common-error/ /docs/troubleshooting 302
/guides/integrations/ollama/ /docs/local-models/ollama 302
/server-suite/ /api-reference 302
/guides/integrations/lmstudio/ /docs/local-models/lmstudio 302
/guides/integrations/mistral-ai/ /docs/remote-models/mistralai 302
/guides/start-server/ /docs/local-api 302
/guides/changelog/ /changelog 302
/guides/models-list/ /docs/models 302
/guides/thread/ /docs/threads 302
/docs/engineering/messages/ /docs/threads 302
/guides/faqs/ /about/faq 302
/docs/integrations/openrouter/ /docs/remote-models/openrouter 302
/guides/windows /docs/desktop/windows 302
/docs/integrations/ollama/ /docs/local-models/ollama 302
/api/overview/ /api-reference 302
/docs/extension-guides/ /docs/extensions 302
/specs/settings/ /docs 302
/docs/UI/ /docs 302
/guides/using-models/import-models-using-absolute-filepath/ /docs/models 302
/install/docker/ /docs/desktop 302
/install/ /docs/desktop 302
/install/from-source/ /docs/desktop 302
/docs/installation/server /docs/desktop 302
/v1/models /docs/models 302
/guides/advanced-settings/ /docs/settings 302
/guides/using-models/install-from-hub/ /docs/models/manage-models#download-from-jan-hub 302
/guides/using-models/import-manually/ /docs/models 302
/docs/team/contributor-program/ /about/team 302
/docs/modules/models /docs/models 302
/getting-started/install/linux /docs/desktop/linux 302
/guides/chatting/start-thread/ /docs/threads 302
/api/files/ /docs 302
/specs/threads/ /docs/threads 302
/about/brand-assets /about 302
/guides/chatting/upload-images/ /docs/threads 302
/guides/using-models/customize-models/ /docs/models 302
/docs/modules/models/ /docs/models 302
/getting-started/install/linux/ /docs/desktop/linux 302
/specs/chats/ /docs/threads 302
/specs/engine/ /docs 302
/specs/data-structures /docs 302
/docs/extension-capabilities/ /docs/extensions 302
/docs/get-started/use-local-server/ /docs/local-api 302
/guides/how-jan-works/ /about/how-we-work 302
/guides/install/cloud-native /docs/desktop 302
/guides/windows/ /docs/desktop/windows 302
/specs/ /docs 302
/docs/get-started/build-extension/ /docs/extensions 302
/specs/files/ /docs 302
/guides/using-models/package-models/ /docs/models 302
/install/overview/ /docs/desktop/windows 302
/docs/get-started/extension-anatomy/ /docs/extensions 302
/docs/get-started/ /docs 302
/guides/mac/ /docs/desktop/mac 302
/intro/ /about 302
/specs/fine-tuning/ /docs 302
/guides/server/ /docs/desktop 302
/specs/file-based/ /docs 302
/docs/extension-guides/monitoring/ /docs/extensions 302
/api/ /api-reference 302
/getting-started/build-an-app /docs/quickstart 302
/features/ai-models/ /docs/models 302
/reference/store/ /api-reference 302
/tutorials/build-chat-app /docs/quickstart 302
/features/acceleration /docs/built-in/llama-cpp 302
/getting-started/install/mac /docs/desktop/mac 302
docs/guides/fine-tuning/what-models-can-be-fine-tuned /docs 302
/docs/specs/threads /docs/threads 302
/docs/api-reference/fine-tuning /api-reference 302
/docs/guides/speech-to-text/prompting /docs/quickstart 302
/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model /docs 302
/getting-started/install/windows /docs/desktop/windows 302
/docs/modules/assistants /docs/assistants 302
/docs/modules/chats /docs/threads 302
/docs/specs/chats /docs/threads 302
/docs/modules/files /docs 302
/tutorials/build-rag-app /docs/tools/retrieval 302
/docs/models/model-endpoint-compatibility /docs/models 302
/docs/guides/legacy-fine-tuning/creating-training-data /docs 302
/docs/specs/models /docs/models 302
/docs/guides/safety-best-practices/end-user-ids /docs/quickstart 302
/docs/modules/assistants/ /docs/assistants 302
/docs/models/overview /docs/models 302
/docs/api-reference/files /api-reference 302
/docs/models/tts /docs/models 302
/docs/guides/fine-tuning /docs 302
/docs/specs/files /docs 302
/docs/modules/threads /docs/threads 302
/guides/linux /docs/desktop/linux 302
/developer/build-engine/engine-anatomy/ /docs 302
/developer/engine/ /docs 302
/docs/product/system-monitor/ /docs 302
/docs/product/settings/ /docs 302
/developer/build-assistant/your-first-assistant/ /docs 302
/engineering/research/ /docs 302
/guides/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
/docs/integrations/langchain/ /integrations 302
/onboarding/ /docs/quickstart 302
/cortex/docs https://cortex.so/ 301
/installation/hardware/ /docs/desktop/windows 302
/docs/features/load-unload /docs 302
/guides/chatting/upload-docs/ /docs/threads 302
/developer/build-extension/package-your-assistant/ /docs 302
/blog/hello-world /blog 302
/docs/get-started/build-on-mobile/ /docs/quickstart 302
/ai/anything-v4 /docs 302
/nitro /docs 302
/tokenizer /docs 302
/hardware/examples/3090x1-@dan-jan /docs 302
/guides/concepts/ /about 302
/platform/ /docs 302
/hardware/examples/AMAZON-LINK-HERE /docs 302
/guides/threads/?productId=openai&prompt=What /docs 302
/guides/threads/?productId=openjourney&prompt=realistic%20portrait%20of%20an%20gray%20dog,%20bright%20eyes,%20radiant%20and%20ethereal%20intricately%20detailed%20photography,%20cinematic%20lighting,%2050mm%20lens%20with%20bokeh /docs 302
/guides/threads/?productId=openjourney&prompt=old,%20female%20robot,%20metal,%20rust,%20wisible%20wires,%20destroyed,%20sad,%20dark,%20dirty,%20looking%20at%20viewer,%20portrait,%20photography,%20detailed%20skin,%20realistic,%20photo-realistic,%208k,%20highly%20detailed,%20full%20length%20frame,%20High%20detail%20RAW%20color%20art,%20piercing,%20diffused%20soft%20lighting,%20shallow%20depth%20of%20field,%20sharp%20focus,%20hyperrealism,%20cinematic%20lighting /docs 302
/guides/threads/?productId=openjourney&prompt=a%20young%20caucasian%20man%20holding%20his%20chin.pablo%20picasso%20style,%20acrylic%20painting,%20trending%20on%20pixiv%20fanbox,%20palette%20knife%20and%20brush.%20strokes /docs 302
/guides/threads/?productId=airoboros&prompt=Let%27s%20role%20play.%20You%20are%20a%20robot%20in%20a%20post-apocalyptic%20world. /docs 302
/chat?productId=pirsus-epic-realism /docs 302
/chat?productId=ether-blu-mix /docs 302
/chat?productId=deliberate /docs 302
/chat?productId=wizard_vicuna /docs 302
/chat?productId=disneypixar /docs 302
/chat?productId=meina-mix /docs 302
/chat?productId=anything-v4 /docs 302
/chat?productId=airoboros /docs 302
/chat?productId=ghost-mix /docs 302
/ai/toonyou /docs 302
/chat?productId=xrica-mix /docs 302
/ai/openai /docs 302
/chat?productId=been-you /docs 302
/chat?productId=toonyou /docs 302
/handbook/product-and-community/ /about/community 302
/handbook/contributing-to-jan/how-to-get-involved-and-faq/ /about 302
/handbook/engineering-exellence/one-the-tools-what-we-use-and-why/ /about 302
/handbook/from-spaghetti-flinging-to-strategy/how-we-gtm/ /about/how-we-work/strategy 302
/handbook/product-and-community/our-okrs/ /about 302
/products-and-innovations/philosophy-behind-product-development/ /about 302
/handbook/core-contributors/ /about/team 302
/handbook/contributing-to-jan/feedback-channels/ /about/how-we-work 302
/handbook/meet-jan/ /docs 302
/handbook/engineering-exellence/ /about 302
/blog/tags/hello/ /blog 302
/about/community/events/nvidia-llm-day-nov-23/ /about 302
/guides/gpus-and-vram /docs 302
/careers/ /about/team 302
/handbook/engineering/ /about/team 302
/handbook/products-and-innovations/ /about 302
/handbook/contributing-to-jan/ /about 302
/handbook/meet-jan/vision-and-mission/ /about 302
/handbook/products-and-innovations/roadmap-present-and-future-directions/ /about 302
/handbook/what-we-do/ /about/team 302
/handbook/onboarding/ /docs 302
/handbook/products-and-innovations/overview-of-jan-framework-and-its-applications/ /docs 302
/handbook/product/ /docs 302
/running /docs 302
/running?model=Open%20Journey%20SD /docs 302
/ai/been-you /about 302
/tokenizer?view=bpe /docs 302
/docs/engineering/ /docs 302
/developer/install-and-prerequisites#system-requirements /docs/desktop/windows 302
/guides/quickstart /docs/quickstart 302
/guides/models /docs/models 302
/guides/threads /docs/threads 302
/guides/local-api /docs/local-api 302
/guides/advanced /docs/settings 302
/guides/engines/llamacpp /docs/built-in/llama-cpp 302
/guides/engines/tensorrt-llm /docs/built-in/tensorrt-llm 302
/guides/engines/lmstudio /docs/local-models/lmstudio 302
/guides/engines/ollama /docs/local-models/ollama 302
/guides/engines/groq /docs/remote-models/groq 302
/guides/engines/mistral /docs/remote-models/mistralai 302
/guides/engines/openai /docs/remote-models/openai 302
/guides/engines/remote-server /docs/remote-inference/generic-openai 302
/extensions /docs/extensions 302
/integrations/discord /integrations/messaging/llmcord 302
/docs/integrations/discord /integrations/messaging/llmcord 302
/integrations/interpreter /integrations/function-calling/interpreter 302
/integrations/raycast /integrations/workflow-automation/raycast 302
/integrations/openrouter /docs/remote-models/openrouter 302
/integrations/continue /integrations/coding/continue-dev 302
/troubleshooting /docs/troubleshooting 302
/changelog/changelog-v0.4.9 /changelog 302
/changelog/changelog-v0.4.8 /changelog 302
/changelog/changelog-v0.4.7 /changelog 302
/changelog/changelog-v0.4.6 /changelog 302
/changelog/changelog-v0.4.5 /changelog 302
/changelog/changelog-v0.4.4 /changelog 302
/changelog/changelog-v0.4.3 /changelog 302
/changelog/changelog-v0.4.2 /changelog 302
/changelog/changelog-v0.4.1 /changelog 302
/changelog/changelog-v0.4.0 /changelog 302
/changelog/changelog-v0.3.3 /changelog 302
/changelog/changelog-v0.3.2 /changelog 302
/changelog/changelog-v0.3.1 /changelog 302
/changelog/changelog-v0.3.0 /changelog 302
/changelog/changelog-v0.2.3 /changelog 302
/changelog/changelog-v0.2.2 /changelog 302
/changelog/changelog-v0.2.1 /changelog 302
/changelog/changelog-v0.2.0 /changelog 302
/guides/troubleshooting/ /docs/troubleshooting 302
/docs/troubleshooting/failed-to-fetch/ /docs/troubleshooting 302
/docs/troubleshooting/stuck-on-broken-build/ /docs/troubleshooting 302
/docs/troubleshooting/somethings-amiss/ /docs/troubleshooting 302
/docs/troubleshooting/how-to-get-error-logs/ /docs/troubleshooting 302
/docs/troubleshooting/permission-denied/ /docs/troubleshooting 302
/docs/troubleshooting/unexpected-token/ /docs/troubleshooting 302
/docs/troubleshooting/undefined-issue/ /docs/troubleshooting 302
/getting-started/troubleshooting/ /docs/troubleshooting 302
/docs/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
/guides/integrations/openrouter/ /docs/remote-models/openrouter 302
/guides/integrations/continue/ /integrations/coding/continue-dev 302
/guides/using-extensions/ /docs/extensions 302
/features/extensions/ /docs/extensions 302
/integrations/tensorrt /docs/built-in/tensorrt-llm 302
/integrations/tensorrt/ /docs/built-in/tensorrt-llm 302
/guides/using-models/integrate-with-remote-server/ /docs/remote-inference/generic-openai 302
/guides/using-models/customize-engine-settings/ /docs/built-in/llama-cpp 302
/developers/plugins/azure-openai/ /docs/remote-models/openai 302
/docs/api-reference/assistants/ /api-reference#tag/assistants 302
/docs/api-reference/models/list/ /api-reference#tag/models 302
/docs/api-reference/threads/ /api-reference#tag/chat 302
/docs/api-reference/messages/ /api-reference#tag/messages 302
/docs/api-reference/models/ /api-reference#tag/models 302
/chat/ /docs/threads 302
/guides/chatting/manage-history/ /docs/threads/ 302
/guides/using-server/ /docs/local-api 302
/guides/using-server/server /docs/local-api 302
/guides/server /docs/desktop 302
/acknowledgements/ /about/acknowledgements 302
/community/ /about/community 302
/faq/ /about/faq 302
/wall-of-love/ /about/wall-of-love 302
/guides/troubleshooting/failed-to-fetch/ /docs/troubleshooting 302
/docs/troubleshooting/gpu-not-used/ /docs/troubleshooting#troubleshooting-nvidia-gpu 302
/docs/troubleshooting/failed-to-fetch/ /docs/troubleshooting 302
/team/contributor-program /about/team 302
/team/join-us /about/team 302
/how-we-work/strategy /about/how-we-work/strategy 302
/how-we-work/strategy/ /about/how-we-work/strategy 302
/how-we-work/project-management /about/how-we-work/project-management 302
/engineering /about/how-we-work/engineering 302
/engineering/ci-cd /about/how-we-work/engineering/ci-cd 302
/engineering/qa /about/how-we-work/engineering/qa 302
/how-we-work/product-design /about 302
/how-we-work/analytics /about/how-we-work/analytics 302
/how-we-work/website-docs /about/how-we-work/website-docs 302
/blog/postmortems/january-10-2024-bitdefender-false-positive-flag /post/bitdefender 302
/guides/error-codes/something-amiss /docs/troubleshooting#somethings-amiss 302
/guides/error-codes/how-to-get-error-logs /docs/troubleshooting#how-to-get-error-logs 302
/guides/chatting /docs/threads 302
/guides/integration/openinterpreter /integrations/function-calling/interpreter 302
/developer/build-assistant /docs/assistants 302
/guides/integrations /integrations 302
/specs/hub /docs 302
/install/windows /docs/desktop/windows 302
/install/linux /docs/desktop/linux 302
/install/nightly /docs/desktop/windows 302
/docs/engineering/fine-tuning /docs 302
/developer/assistant /docs/assistants 302
/guides/common-error/broken-build /docs/troubleshooting#broken-build 302
/guides/using-server/using-server /docs/local-api 302
/guides/integrations/azure-openai-service /docs/remote-models/openai 302
/specs/messages /docs/threads 302
/docs/engineering/models /docs/models 302
/docs/specs/assistants /docs/assistants 302
/docs/engineering/chats /docs/threads 302
/guides/using-extensions/extension-settings /docs/extensions 302
/guides/models/customize-engine /docs/models 302
/guides/integration/mistral /docs/remote-models/mistralai 302
/guides/common-error /docs/troubleshooting 302
/guides/integrations/ollama /docs/local-models/ollama 302
/server-suite /api-reference 302
/guides/integrations/lmstudio /docs/local-models/lmstudio 302
/guides/integrations/mistral-ai /docs/remote-models/mistralai 302
/guides/start-server /docs/local-api 302
/guides/changelog /changelog 302
/guides/models-list /docs/models 302
/guides/thread /docs/threads 302
/docs/engineering/messages /docs/threads 302
/guides/faqs /about/faq 302
/docs/integrations/openrouter /docs/remote-models/openrouter 302
/docs/integrations/ollama/ /docs/local-models/ollama 302
/api/overview /api-reference 302
/docs/extension-guides /docs/extensions 302
/specs/settings /docs 302
/docs/UI /docs 302
/guides/using-models/import-models-using-absolute-filepath /docs/models 302
/install/docker /docs/desktop 302
/v1/models/ /docs/models 302
/guides/using-models/import-manually /docs/models 302
/docs/team/contributor-program /about/team 302
/guides/chatting/start-thread /docs/threads 302
/api/files /docs 302
/specs/threads /docs/threads 302
/about/brand-assets/ /about 302
/guides/chatting/upload-images /docs/threads 302
/guides/using-models/customize-models /docs/models 302
/specs/chats /docs/threads 302
/specs/engine /docs 302
/specs/data-structures/ /docs 302
/docs/extension-capabilities /docs/extensions 302
/docs/get-started/use-local-server /docs/local-api 302
/guides/install/cloud-native/ /docs/desktop 302
/guides/install/ /docs/desktop 302
/docs/installation/desktop /docs/desktop 302
/specs /docs 302
/docs/get-started/build-extension /docs/extensions 302
/specs/files /docs 302
/guides/using-models/package-models /docs/models 302
/guides/using-models/ /docs/models 302
/install/overview /docs/desktop/windows 302
/developer/prereq/ /docs 302
/docs/get-started/extension-anatomy /docs/extensions 302
/guides/mac /docs/desktop/mac 302
/intro /about 302
/specs/fine-tuning /docs 302
/specs/file-based /docs 302
/docs/extension-guides/monitoring /docs/extensions 302
/api /api-reference 302
/getting-started/build-an-app/ /docs/quickstart 302
/features/ai-models /docs/models 302
/reference/store /api-reference 302
/tutorials/build-chat-app/ /docs/quickstart 302
/features/acceleration/ /docs/built-in/llama-cpp 302
/getting-started/install/mac/ /docs/desktop/mac 302
docs/guides/fine-tuning/what-models-can-be-fine-tuned/ /docs 302
/docs/specs/threads/ /docs/threads 302
/docs/api-reference/fine-tuning/ /api-reference 302
/docs/guides/speech-to-text/prompting/ /docs/quickstart 302
/docs/guides/legacy-fine-tuning/analyzing-your-fine-tuned-model/ /docs 302
/getting-started/install/windows/ /docs/desktop/windows 302
/docs/modules/chats/ /docs/threads 302
/docs/specs/chats/ /docs/threads 302
/docs/modules/files/ /docs 302
/tutorials/build-rag-app/ /docs/tools/retrieval 302
/docs/models/model-endpoint-compatibility/ /docs/models 302
/docs/guides/legacy-fine-tuning/creating-training-data/ /docs 302
/docs/specs/models/ /docs/models 302
/docs/guides/safety-best-practices/end-user-ids/ /docs/quickstart 302
/docs/models/overview/ /docs/models 302
/docs/api-reference/files/ /api-reference 302
/docs/models/tts/ /docs/models 302
/docs/guides/fine-tuning/ /docs 302
/docs/specs/files/ /docs 302
/docs/modules/threads/ /docs/threads 302
/guides/linux/ /docs/desktop/linux 302
/developer/build-engine/engine-anatomy /docs 302
/developer/engine /docs 302
/docs/product/system-monitor /docs 302
/docs/product/settings /docs 302
/developer/build-assistant/your-first-assistant /docs 302
/engineering/research /docs 302
/docs/integrations/langchain /integrations 302
/onboarding /docs/quickstart 302
/installation/hardware /docs/desktop/windows 302
/docs/features/load-unload/ /docs 302
/guides/chatting/upload-docs /docs/threads 302
/developer/build-extension/package-your-assistant /docs 302
/blog/hello-world/ /blog 302
/docs/get-started/build-on-mobile /docs/quickstart 302
/ai/anything-v4/ /docs 302
/nitro/ /docs 302
/tokenizer/ /docs 302
/hardware/examples/3090x1-@dan-jan/ /docs 302
/guides/concepts /about 302
/platform /docs 302
/hardware/examples/AMAZON-LINK-HERE/ /docs 302
/guides/threads/?productId=openai&prompt=What/ /docs 302
/guides/threads/?productId=openjourney&prompt=realistic%20portrait%20of%20an%20gray%20dog,%20bright%20eyes,%20radiant%20and%20ethereal%20intricately%20detailed%20photography,%20cinematic%20lighting,%2050mm%20lens%20with%20bokeh/ /docs 302
/guides/threads/?productId=openjourney&prompt=old,%20female%20robot,%20metal,%20rust,%20wisible%20wires,%20destroyed,%20sad,%20dark,%20dirty,%20looking%20at%20viewer,%20portrait,%20photography,%20detailed%20skin,%20realistic,%20photo-realistic,%208k,%20highly%20detailed,%20full%20length%20frame,%20High%20detail%20RAW%20color%20art,%20piercing,%20diffused%20soft%20lighting,%20shallow%20depth%20of%20field,%20sharp%20focus,%20hyperrealism,%20cinematic%20lighting/ /docs 302
/guides/threads/?productId=openjourney&prompt=a%20young%20caucasian%20man%20holding%20his%20chin.pablo%20picasso%20style,%20acrylic%20painting,%20trending%20on%20pixiv%20fanbox,%20palette%20knife%20and%20brush.%20strokes/ /docs 302
/guides/threads/?productId=airoboros&prompt=Let%27s%20role%20play.%20You%20are%20a%20robot%20in%20a%20post-apocalyptic%20world./ /docs 302
/chat?productId=pirsus-epic-realism/ /docs 302
/chat?productId=ether-blu-mix/ /docs 302
/chat?productId=deliberate/ /docs 302
/chat?productId=wizard_vicuna/ /docs 302
/chat?productId=disneypixar/ /docs 302
/chat?productId=meina-mix/ /docs 302
/chat?productId=anything-v4/ /docs 302
/chat?productId=airoboros/ /docs 302
/chat?productId=ghost-mix/ /docs 302
/ai/toonyou/ /docs 302
/chat?productId=xrica-mix/ /docs 302
/ai/openai/ /docs 302
/chat?productId=been-you/ /docs 302
/chat?productId=toonyou/ /docs 302
/handbook/product-and-community /about/community 302
/handbook/contributing-to-jan/how-to-get-involved-and-faq /about 302
/handbook/engineering-exellence/one-the-tools-what-we-use-and-why /about 302
/handbook/from-spaghetti-flinging-to-strategy/how-we-gtm /about/how-we-work/strategy 302
/handbook/product-and-community/our-okrs /about 302
/products-and-innovations/philosophy-behind-product-development /about 302
/handbook/core-contributors /about/team 302
/handbook/contributing-to-jan/feedback-channels /about/how-we-work 302
/handbook/meet-jan /docs 302
/handbook/engineering-exellence /about 302
/blog/tags/hello /blog 302
/about/community/events/nvidia-llm-day-nov-23 /about 302
/guides/gpus-and-vram/ /docs 302
/careers /about/team 302
/handbook/engineering /about/team 302
/handbook/products-and-innovations /about 302
/handbook/contributing-to-jan /about 302
/handbook/meet-jan/vision-and-mission /about 302
/handbook/products-and-innovations/roadmap-present-and-future-directions /about 302
/handbook/what-we-do /about/team 302
/handbook/onboarding /docs 302
/handbook/products-and-innovations/overview-of-jan-framework-and-its-applications /docs 302
/handbook/product /docs 302
/running/ /docs 302
/running?model=Open%20Journey%20SD/ /docs 302
/ai/been-you/ /about 302
/tokenizer?view=bpe/ /docs 302
/docs/engineering /docs 302
/developer /docs 302
/developer/ /docs 302
/developer/architecture /docs/architecture 302
/developer/architecture/ /docs/architecture 302
/developer/file-based /docs 302
/developer/file-based/ /docs 302
/developer/framework /docs 302
/developer/framework/ /docs 302
/developer/framework/engineering /docs 302
/developer/framework/engineering/ /docs 302
/developer/framework/engineering/assistants /docs/assistants 302
/developer/framework/engineering/assistants/ /docs/assistants 302
/developer/framework/engineering/chats /docs/threads 302
/developer/framework/engineering/chats/ /docs/threads 302
/developer/framework/engineering/engine /docs 302
/developer/framework/engineering/engine/ /docs 302
/developer/framework/engineering/files /docs 302
/developer/framework/engineering/files/ /docs 302
/developer/framework/engineering/fine-tuning /docs 302
/developer/framework/engineering/fine-tuning/ /docs 302
/developer/framework/engineering/messages /docs/threads 302
/developer/framework/engineering/messages/ /docs/threads 302
/developer/framework/engineering/models /docs/models 302
/developer/framework/engineering/models/ /docs/models 302
/developer/framework/engineering/prompts /docs 302
/developer/framework/engineering/prompts/ /docs 302
/developer/framework/engineering/threads /docs/threads 302
/developer/framework/engineering/threads/ /docs/threads 302
/developer/framework/product /docs 302
/developer/framework/product/ /docs 302
/developer/framework/product/chat /docs/threads 302
/developer/framework/product/chat/ /docs/threads 302
/developer/framework/product/hub /docs 302
/developer/framework/product/hub/ /docs 302
/developer/framework/product/jan /about 302
/developer/framework/product/jan/ /about 302
/developer/framework/product/settings /docs/settings 302
/developer/framework/product/settings/ /docs/settings 302
/developer/framework/product/system-monitor /docs 302
/developer/framework/product/system-monitor/ /docs 302
/developer/user-interface /docs 302
/developer/user-interface/ /docs 302
/docs/desktop /docs/desktop/windows 302
/docs/desktop/ /docs/desktop/windows 302
/docs/inferences/groq /docs/remote-models/groq 302
/docs/inferences/groq/ /docs/remote-models/groq 302
/docs/inferences/llamacpp /docs/built-in/llama-cpp 302
/docs/inferences/llamacpp/ /docs/built-in/llama-cpp 302
/docs/inferences/lmstudio /docs/local-models/lmstudio 302
/docs/inferences/lmstudio/ /docs/local-models/lmstudio 302
/docs/inferences/mistralai /docs/remote-models/mistralai 302
/docs/inferences/mistralai/ /docs/remote-models/mistralai 302
/docs/inferences/ollama /docs/local-models/ollama 302
/docs/inferences/ollama/ /docs/local-models/ollama 302
/docs/inferences/openai /docs/remote-models/openai 302
/docs/inferences/openai/ /docs/remote-models/openai 302
/docs/inferences/remote-server-integration /docs/remote-inference/generic-openai 302
/docs/inferences/remote-server-integration/ /docs/remote-inference/generic-openai 302
/docs/inferences/tensorrtllm /docs/built-in/tensorrt-llm 302
/docs/inferences/tensorrtllm/ /docs/built-in/tensorrt-llm 302
/docs/integrations/router /docs/remote-models/openrouter 302
/docs/integrations/router/ /docs/remote-models/openrouter 302
/docs/server /docs/local-api 302
/docs/server/ /docs/local-api 302
/features/ /docs 302
/features /docs 302
/features/local/ /docs/local-api 302
/features/local /docs/local-api 302
/guides/providers/tensorrt-llm /docs/built-in/tensorrt-llm 302
/guides/providers/tensorrt-llm/ /docs/built-in/tensorrt-llm 302
/hardware/recommendations/by-model/ /docs 302
/hardware/recommendations/by-hardware/ /docs 302
/product /docs 302
/product/features /docs 302
/product/features/agents-framework /docs 302
/product/features/api-server /docs/local-api 302
/product/features/data-security /docs 302
/product/features/extensions-framework /docs/extensions 302
/product/features/local /docs 302
/product/features/remote /docs 302
/product/home-server /docs/local-api 302
/guides/providers/tensorrt-llm/ /docs/built-in/tensorrt-llm 302
/docs/tools /docs/tools/retrieval 302
/docs/local-inference/llamacpp /docs/built-in/llama-cpp 302
/docs/local-inference/tensorrtllm /docs/built-in/tensorrt-llm 302
/guides/using-server/server/ /docs/local-api 302
/integrations/coding/vscode /integrations/coding/continue-dev 302
/docs/integrations/interpreter /integrations/function-calling/interpreter 302
/cortex/built-in/llama-cpp /docs 302
/docs/desktop-installation/linux /docs/desktop/linux 302
/docs/desktop-installation/windows /docs/desktop/windows 302
/docs/desktop-installation/mac /docs/desktop/mac 302
/desktop/ /docs/desktop 302
/developer/ui/ /docs 302
/docs/local-inference/lmstudio /docs/local-models/lmstudio 302
/docs/local-inference/ollama /docs/local-models/ollama 302
/docs/remote-inference/openai /docs/remote-models/openai 302
/docs/remote-inference/groq /docs/remote-models/groq 302
/docs/remote-inference/mistralai /docs/remote-models/mistralai 302
/docs/remote-inference/openrouter /docs/remote-models/openrouter 302
/docs/remote-inference/generic-openai /docs/remote-models/generic-openai 302
/docs/desktop-installation /docs/desktop 302
/hardware/concepts/gpu-and-vram/ /docs 302
/hardware/recommendations/by-usecase/ /docs 302
/about/how-we-work/strategy /about 302
/docs/engineering/assistants/ /docs 302
/cortex https://cortex.so/docs/ 301
/cortex/quickstart https://cortex.so/docs/quickstart 301
/cortex/hardware https://cortex.so/docs/hardware 301
/cortex/installation https://cortex.so/docs/category/installation 301
/cortex/installation/mac https://cortex.so/docs/instalation/mac 301
/cortex/installation/windows https://cortex.so/docs/instalation/windows 301
/cortex/installation/linux https://cortex.so/docs/instalation/linux 301
/cortex/command-line https://cortex.so/docs/command-line 301
/cortex/ts-library https://cortex.so/docs/ts-library 301
/cortex/py-library https://cortex.so/docs/py-library 301
/cortex/server https://cortex.so/docs/server 301
/cortex/text-generation https://cortex.so/docs/text-generation 301
/cortex/cli https://cortex.so/docs/cli/ 301
/cortex/cli/init https://cortex.so/docs/cli/init 301
/cortex/cli/pull https://cortex.so/docs/cli/pull 301
/cortex/cli/run https://cortex.so/docs/cli/run 301
/cortex/cli/models https://cortex.so/docs/cli/models/ 301
/cortex/cli/models/download https://cortex.so/docs/cli/models/download 301
/cortex/cli/models/list https://cortex.so/docs/cli/models/list 301
/cortex/cli/models/get https://cortex.so/docs/cli/models/get 301
/cortex/cli/models/update https://cortex.so/docs/cli/models/update 301
/cortex/cli/models/start https://cortex.so/docs/cli/models/start 301
/cortex/cli/models/stop https://cortex.so/docs/cli/models/stop 301
/cortex/cli/models/remove https://cortex.so/docs/cli/models/remove 301
/cortex/cli/ps https://cortex.so/docs/cli/ps 301
/cortex/cli/chat https://cortex.so/docs/cli/chat 301
/cortex/cli/kill https://cortex.so/docs/cli/kill 301
/cortex/cli/serve https://cortex.so/docs/cli/serve 301
/cortex/architecture https://cortex.so/docs/architecture 301
/cortex/cortex-cpp https://cortex.so/docs/cortex-cpp 301
/cortex/cortex-llamacpp https://cortex.so/docs/cortex-llamacpp 301
/api-reference https://cortex.so/api-reference 301
/docs/assistants /docs 302
/docs/server-installation/ /docs/desktop 302
/docs/server-installation/onprem /docs/desktop 302
/docs/server-installation/aws /docs/desktop 302
/docs/server-installation/gcp /docs/desktop 302
/docs/server-installation/azure /docs/desktop 302
/about /docs 302
/api-server /docs/api-server 302
/cdn-cgi/l/email-protection 302
/docs/built-in/tensorrt-llm 302
/docs/desktop/beta /docs 302
/docs/docs/data-folder /docs/data-folder 302
/docs/docs/desktop/linux /docs/desktop/linux 302
/docs/docs/troubleshooting /docs/troubleshooting 302
/docs/local-engines/llama-cpp 302
/docs/models/model-parameters 302
/mcp /docs/mcp 302
/quickstart /docs/quickstart 302
/server-examples/continue-dev /docs/server-examples/continue-dev 302
/about /handbook 302
/about/ /handbook 302
/about/community /handbook 302
/about/handbook /handbook 302
/about/handbook/analytics /handbook 302
/about/handbook/project-management /handbook 302
/about/handbook/strategy /handbook 302
/about/handbook/website-docs /handbook 302
/about/how-we-work/product-design /handbook 302
/about/how-we-work/strategy /handbook 302
/about/investors /handbook 302
/about/roadmap /handbook 302
/about/team /handbook 302
/about/vision /handbook 302
/about/wall-of-love /handbook 302
/handbook/contributing-to-jan/ /handbook 302
/handbook/core-contributors/how-we-hire/ /handbook 302
/handbook/engineering-excellence/ /handbook 302
/handbook/engineering/ /handbook 302
/handbook/product-and-community/ /handbook 302
/handbook/products-and-innovations/ /handbook 302
/handbook/what-we-do/our-approach-to-design/ /handbook 302
/how-we-work/product-design /handbook 302
/handbook/product-and-community/approaches-to-beta-testing-and-user-engagement/ /handbook 302
/cortex/assistants /docs/ 302
/cortex/build-extension /docs/ 302
/cortex/built-in/tensorrt-llm /docs/ 302
/cortex/cli/kill /docs/ 302
/cortex/command-line /docs/ 302
/cortex/cortex-openvino /docs/ 302
/cortex/cortex-python /docs/ 302
/cortex/cortex-tensorrt-llm /docs/ 302
/cortex/desktop-installation/linux /docs/ 302
/cortex/embeddings /docs/ 302
/cortex/ext-architecture /docs/ 302
/cortex/fine-tuning /docs/ 302
/cortex/fine-tuning/overview /docs/ 302
/cortex/function-calling /docs/ 302
/cortex/installation/linux /docs/ 302
/cortex/installation/mac /docs/ 302
/cortex/model-operations /docs/ 302
/cortex/model-operations/overview /docs/ 302
/cortex/rag/overview /docs/ 302
/cortex/server /docs/ 302
/docs/tools/retrieval /docs/ 302
/developer/framework/engineering/chats /docs/ 302
/developer/framework/engineering/threads/ /docs/ 302
/developer/framework/product/chat /docs/ 302
/docs/extensions /docs/ 302
/docs/shortcuts /docs/ 302
/docs/models /docs/ 302
/integrations/function-calling/interpreter /docs/ 302
/docs/desktop/built-in/tensorrt-llm /docs 302
/docs/desktop/beta /docs/desktop 302
/platforms /docs/desktop 302
/docs/built-in/llama-cpp /docs/desktop/llama-cpp 302
/docs/install-engines /docs/desktop/llama-cpp 302
/docs/local-api /docs/desktop/api-server 302
/docs/local-engines/llama-cpp /docs/desktop/llama-cpp 302
/docs/api-server /docs/desktop/api-server 302
/docs/assistants /docs/desktop/assistants 302
/docs/models/manage-models /docs/desktop/manage-models 302
/docs/data-folder /docs/desktop/data-folder 302
/cortex/vision /handbook/open-superintelligence 302
/docs/models/model-parameters /docs/desktop/model-parameters 302
/docs/remote-models/generic-openai /docs/desktop/remote-models/openai 302
/docs/threads /changelog/2024-01-16-settings-options-right-panel 302
/docs/desktop/docs/data-folder /docs/desktop/data-folder 302
/docs/desktop/docs/desktop/install/linux /docs/desktop/install/linux 302
/docs/desktop/docs/desktop/troubleshooting /docs/desktop/troubleshooting 302
/docs/desktop/linux /docs/desktop/install/linux 302
/docs/desktop/local-engines/llama-cpp /docs/desktop/llama-cpp-server 302
/docs/desktop/models/model-parameters /docs/desktop/model-parameters 302
/docs/desktop/windows /docs/desktop/install/windows 302
/docs/docs/data-folder /docs/desktop/data-folder 302
/docs/docs/desktop/linux /docs/desktop/install/linux 302
/docs/docs/troubleshooting /docs/desktop/troubleshooting 302
/docs/jan-models/jan-nano-32 /docs/desktop/jan-models/jan-nano-32 302
/docs/jan-models/jan-v1 /docs/desktop/jan-models/jan-v1 302
/docs/jan-models/lucy /docs/desktop/jan-models/lucy 302
/docs/llama-cpp /docs/desktop/llama-cpp 302
/docs/manage-models /docs/desktop/manage-models 302
/docs/mcp /docs/desktop/mcp 302
/docs/mcp-examples/data-analysis/e2b /docs/desktop/mcp-examples/data-analysis/e2b 302
/docs/mcp-examples/deepresearch/octagon /docs/desktop/mcp-examples/deepresearch/octagon 302
/docs/mcp-examples/design/canva /docs/desktop/mcp-examples/design/canva 302
/docs/mcp-examples/productivity/linear /docs/desktop/mcp-examples/productivity/linear 302
/docs/mcp-examples/search/exa /docs/desktop/mcp-examples/search/exa 302
/docs/model-parameters /docs/desktop/model-parameters 302
/docs/remote-models/cohere /docs/desktop/remote-models/cohere 302
/docs/remote-models/google /docs/desktop/remote-models/google 302
/docs/remote-models/groq /docs/desktop/remote-models/groq 302
/docs/remote-models/huggingface /docs/desktop/remote-models/huggingface 302
/docs/remote-models/mistralai /docs/desktop/remote-models/mistralai 302
/docs/remote-models/openai /docs/desktop/remote-models/openai 302
/docs/server-examples/continue-dev /docs/desktop/server-examples/continue-dev 302
/docs/server-examples/n8n /docs/desktop/server-examples/n8n 302
/docs/server-troubleshooting /docs/desktop/troubleshooting 302
/docs/privacy-policy /privacy 302
/docs/server-settings /docs/desktop/server-settings 302
/docs/settings /docs/desktop/settings 302
/docs/llama-cpp-server /docs/desktop/llama-cpp-server 302
/docs/install/linux /docs/desktop/install/linux 302
/docs/install/macos /docs/desktop/install/mac 302
/docs/install/windows /docs/desktop/install/windows 302
/docs/mcp-examples/browser/browserbase /docs/desktop/mcp-examples/browser/browserbase 302
/docs/jan-models/jan-nano-128 /docs/desktop/jan-models/jan-nano-128 302
/docs/mcp-examples/search/serper /docs/desktop/mcp-examples/search/serper 302
/docs/mcp-examples/data-analysis/jupyter /docs/desktop/mcp-examples/data-analysis/jupyter 302
/docs/mcp-examples/productivity/todoist /docs/desktop/mcp-examples/productivity/todoist 302
/docs/remote-models/anthropic /docs/desktop/remote-models/anthropic 302
/docs/remote-models/openrouter /docs/desktop/remote-models/openrouter 302
/docs/server-examples/llmcord /docs/desktop/server-examples/llmcord 302
/docs/server-examples/tabby /docs/desktop/server-examples/tabby 302
/guides/integrations/continue/ /docs/desktop/server-examples/continue-dev 302
/continue-dev /docs/desktop/server-examples/continue-dev 302
/integrations /docs/desktop/server-examples/continue-dev 302
/integrations/coding/continue-dev /docs/desktop/server-examples/continue-dev 302
/integrations/continue/ /docs/desktop/server-examples/continue-dev 302
/integrations/coding/tabby /docs/desktop/server-examples/tabby 302
/integrations/messaging/llmcord /docs/desktop/server-examples/llmcord 302
/integrations/workflow-automation/n8n /docs/desktop/server-examples/n8n 302
/local-server/continue-dev /docs/desktop/server-examples/continue-dev 302
/local-server/data-folder /docs/desktop/desktop/data-folder 302
/local-server/llama-cpp /docs/desktop/desktop/llama-cpp 302
/local-server/n8n /docs/desktop/server-examples/n8n 302
/local-server/settings /docs/desktop/server-settings 302
/local-server/tabby /docs/desktop/server-examples/tabby 302
/local-server/troubleshooting /docs/desktop/troubleshooting 302
/mcp /docs/desktop/mcp 302
/quickstart /docs/desktop/quickstart 302
/server-examples/continue-dev /docs/desktop/server-examples/continue-dev 302

View File

@ -1,148 +1,125 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:video="http://www.google.com/schemas/sitemap-video/1.1">
<url><loc>https://jan.ai</loc><lastmod>2025-03-10T05:06:47.876Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/analytics</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/engineering</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/engineering/ci-cd</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/engineering/qa</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/product-design</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/project-management</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/strategy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/handbook/website-docs</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/investors</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/team</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/vision</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/about/wall-of-love</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/blog</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2023-12-21-faster-inference-across-platform</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-01-16-settings-options-right-panel</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-01-29-local-api-server</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-02-05-jan-data-folder</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-02-10-jan-is-more-stable</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-02-26-home-servers-with-helm</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-03-06-ui-revamp-settings</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-03-11-import-models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-03-19-nitro-tensorrt-llm-extension</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-04-02-groq-api-integration</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-04-15-new-mistral-extension</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-04-25-llama3-command-r-hugginface</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-05-20-llamacpp-upgrade-new-remote-models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-06-21-nvidia-nim-support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-07-15-claude-3-5-support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-09-01-llama3-1-gemma2-support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-09-17-improved-cpu-performance</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-10-24-jan-stable</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-11-22-jan-bugs</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-11.14-jan-supports-qwen-coder</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-12-03-jan-is-faster</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-12-05-jan-hot-fix-mac</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-12-30-jan-new-privacy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-01-06-key-issues-resolved</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-01-23-deepseek-r1-jan</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/architecture</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/assistants</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/build-extension</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/chat</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/init</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/kill</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/download</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/get</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/list</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/remove</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/start</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/stop</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/models/update</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/ps</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/pull</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/run</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cli/serve</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/command-line</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cortex-cpp</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cortex-llamacpp</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cortex-openvino</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cortex-python</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/cortex-tensorrt-llm</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/embeddings</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/embeddings/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/error-codes</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/ext-architecture</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/fine-tuning</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/fine-tuning/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/function-calling</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/hardware</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/installation</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/installation/linux</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/installation/mac</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/installation/windows</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/model-operations</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/model-operations/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/py-library</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/quickstart</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/rag</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/rag/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/server</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/text-generation</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/ts-library</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/vision</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/cortex/vision/overview</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/api-server</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/assistants</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/configure-extensions</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/data-folder</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/linux</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mac</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/windows</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/error-codes</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/extensions</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/extensions-settings/model-management</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/extensions-settings/system-monitoring</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/install-engines</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/install-extensions</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/local-engines/llama-cpp</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/models/manage-models</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/models/model-parameters</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/privacy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/privacy-policy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/quickstart</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/anthropic</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/cohere</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/deepseek</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/google</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/groq</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/martian</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/mistralai</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/nvidia-nim</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/openai</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/openrouter</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/remote-models/triton</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/settings</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/threads</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/tools/retrieval</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/troubleshooting</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/download</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/integrations</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/integrations/coding/continue-dev</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/integrations/coding/tabby</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/integrations/function-calling/interpreter</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/integrations/messaging/llmcord</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/integrations/workflow-automation/n8n</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/benchmarking-nvidia-tensorrt-llm</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/bitdefender</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/data-is-moat</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/deepseek-r1-locally</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/offline-chatgpt-alternative</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/rag-is-not-enough</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/run-ai-models-locally</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/privacy</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/support</loc><lastmod>2025-03-10T05:06:47.877Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai</loc><lastmod>2025-09-24T03:40:05.491Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/api-reference</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/api-reference/api-reference</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/api-reference/architecture</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/api-reference/configuration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/api-reference/development</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/api-reference/installation</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/blog</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2023-12-21-faster-inference-across-platform</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-01-16-settings-options-right-panel</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-01-29-local-api-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-02-05-jan-data-folder</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-02-10-jan-is-more-stable</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-02-26-home-servers-with-helm</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-03-06-ui-revamp-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-03-11-import-models</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-03-19-nitro-tensorrt-llm-extension</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-04-02-groq-api-integration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-04-15-new-mistral-extension</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-04-25-llama3-command-r-hugginface</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-05-20-llamacpp-upgrade-new-remote-models</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-05-28-cohere-aya-23-8b-35b-phi-3-medium</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-06-21-nvidia-nim-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-07-15-claude-3-5-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-09-01-llama3-1-gemma2-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-09-17-improved-cpu-performance</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-10-24-jan-stable</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-11-22-jan-bugs</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-11.14-jan-supports-qwen-coder</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-12-03-jan-is-faster</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-12-05-jan-hot-fix-mac</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2024-12-30-jan-new-privacy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-01-06-key-issues-resolved</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-01-23-deepseek-r1-jan</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-02-18-advanced-llama.cpp-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-03-14-jan-security-patch</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-05-14-jan-qwen3-patch</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-06-19-jan-ui-revamp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-06-26-jan-nano-mcp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-07-17-responsive-ui</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-07-31-llamacpp-tutorials</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-08-07-gpt-oss</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-08-14-general-improvs</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-08-28-image-support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/changelog/2025-09-18-auto-optimize-vision-imports</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/api-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/assistants</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/data-folder</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/install/linux</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/install/mac</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/install/windows</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/jan-models/jan-nano-128</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/jan-models/jan-nano-32</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/jan-models/jan-v1</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/jan-models/lucy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/llama-cpp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/llama-cpp-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/manage-models</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/browser/browserbase</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/data-analysis/e2b</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/data-analysis/jupyter</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/deepresearch/octagon</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/design/canva</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/productivity/linear</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/productivity/todoist</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/search/exa</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/mcp-examples/search/serper</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/model-parameters</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/privacy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/privacy-policy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/quickstart</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/anthropic</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/cohere</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/google</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/groq</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/huggingface</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/mistralai</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/openai</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/remote-models/openrouter</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/server-examples/continue-dev</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/server-examples/llmcord</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/server-examples/n8n</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/server-examples/tabby</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/server-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/server-troubleshooting</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/desktop/troubleshooting</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-administration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-authentication</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-chat</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-chat-conversations</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-conversations</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-jan-responses</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/api-reference-jan-server</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/architecture</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/configuration</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/development</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/installation</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/docs/server/overview</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/download</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/handbook</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/handbook/betting-on-open-source</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/handbook/open-superintelligence</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/benchmarking-nvidia-tensorrt-llm</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/bitdefender</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/data-is-moat</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/deepresearch</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/deepseek-r1-locally</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/jan-v1-for-research</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/offline-chatgpt-alternative</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/qwen3-settings</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/rag-is-not-enough</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/run-ai-models-locally</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/post/run-gpt-oss-locally</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/privacy</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
<url><loc>https://jan.ai/support</loc><lastmod>2025-09-24T03:40:05.492Z</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
</urlset>

View File

@ -77,9 +77,9 @@ export default function Footer() {
return (
<footer className="py-4 w-full">
<div className="mx-auto">
<div className="grid grid-cols-1 md:grid-cols-6 gap-8">
<div className="grid grid-cols-1 lg:grid-cols-6 gap-8">
{/* Jan Logo and Newsletter */}
<div className="md:col-span-2">
<div className="md:col-span-1 lg:col-span-2">
<h2 className="text-[52px] font-bold mb-6">Jan</h2>
<div>
<div className="flex items-center gap-2 mb-3">
@ -138,7 +138,7 @@ export default function Footer() {
{/* Menu Columns */}
{FOOTER_MENUS.map((menu) => (
<div key={menu.title} className="">
<h3 className="text-lg mb-4 font-bold">{menu.title}</h3>
<h3 className="text-base mb-4 font-bold">{menu.title}</h3>
<ul className="space-y-2">
{menu.links.map((link) => (
<li key={link.name}>

View File

@ -95,7 +95,7 @@ const Home = () => {
<div className="container mx-auto relative z-10">
<div className="flex justify-center items-center mt-14 lg:mt-20 px-4">
<a
href=""
href={`https://github.com/menloresearch/jan/releases/tag/${lastVersion}`}
target="_blank"
rel="noopener noreferrer"
className="bg-black/40 px-3 lg:px-4 rounded-full h-10 inline-flex items-center max-w-full animate-fade-in delay-100"
@ -109,7 +109,7 @@ const Home = () => {
</span>
</a>
</div>
<div className="mt-10">
<div className="mt-4">
<div className="text-center relative lg:w-1/2 mx-auto">
<div className="flex flex-col lg:flex-row items-center justify-center gap-4 animate-fade-in-up delay-300">
<span>
@ -124,15 +124,20 @@ const Home = () => {
</h1>
</div>
<p className="px-4 lg:px-0 mt-2 text-lg lg:text-2xl font-medium leading-relaxed text-white animate-fade-in-up delay-500 -tracking-[0.6px]">
Jan is the open-source ChatGPT replacement.
The best of open-source AI in an easy-to-use product.
</p>
</div>
<div className="flex px-4 flex-col lg:flex-row items-center gap-4 w-full justify-center text-center animate-fade-in-up delay-600 mt-8 lg:mt-10">
<DropdownButton
size="xxl"
className="w-full !rounded-[20px] lg:w-auto"
lastRelease={lastRelease}
/>
<div className="flex px-4 flex-col lg:flex-row items-start gap-4 w-full justify-center text-center animate-fade-in-up delay-600 mt-8 lg:mt-10">
<div>
<DropdownButton
size="xxl"
className="w-full !rounded-[20px] lg:w-auto"
lastRelease={lastRelease}
/>
<div className="font-medium text-center mt-2 text-white">
+{totalDownload(release)} downloads
</div>
</div>
<a
href="https://discord.com/invite/FTk2MvZwJH"
target="_blank"
@ -189,7 +194,8 @@ const Home = () => {
</defs>
</svg>
<span className="text-sm">
{formatCompactNumber(discordWidget.presence_count)}
15k+
{/* {formatCompactNumber(discordWidget.presence_count)} */}
</span>
</div>
</Button>
@ -198,7 +204,7 @@ const Home = () => {
</div>
</div>
<div className="absolute w-full bottom-0 left-0 flex justify-center">
<div className="absolute w-full -bottom-10 left-0 flex justify-center">
<img
className="abs animate-float scale-[175%] md:scale-100"
src={CuteRobotFlyingPNG.src}
@ -448,9 +454,10 @@ const Home = () => {
<div className="flex items-center gap-1 ml-3">
<IoMdPeople className="size-5" />
<span className="text-sm">
{formatCompactNumber(
15k+
{/* {formatCompactNumber(
discordWidget.presence_count
)}
)} */}
</span>
</div>
</Button>
@ -483,9 +490,10 @@ const Home = () => {
<div className="flex items-center gap-1 ml-3">
<IoMdPeople className="size-5" />
<span className="text-sm">
{formatCompactNumber(
15k+
{/* {formatCompactNumber(
discordWidget.presence_count
)}
)} */}
</span>
</div>
</Button>

View File

@ -155,7 +155,7 @@ Debugging headquarters (`/logs/app.txt`):
The silicon brain collection. Each model has its own `model.json`.
<Callout type="info">
Full parameters: [here](/docs/model-parameters)
Full parameters: [here](/docs/desktop/model-parameters)
</Callout>
### `threads/`
@ -216,5 +216,5 @@ Chat archive. Each thread (`/threads/jan_unixstamp/`) contains:
## Delete Jan Data
Uninstall guides: [Mac](/docs/desktop/mac#step-2-clean-up-data-optional),
[Windows](/docs/desktop/windows#step-2-handle-jan-data), or [Linux](docs/desktop/linux#uninstall-jan).
Uninstall guides: [Mac](/docs/desktop/install/mac#step-2-clean-up-data-optional),
[Windows](/docs/desktop/install/windows#step-2-handle-jan-data), or [Linux](docs/desktop/install/linux#uninstall-jan).

View File

@ -184,9 +184,9 @@ Jan is built on the shoulders of giants:
<FAQBox title="Is Jan compatible with my system?">
**Supported OS**:
- [Windows 10+](/docs/desktop/windows#compatibility)
- [macOS 12+](/docs/desktop/mac#compatibility)
- [Linux (Ubuntu 20.04+)](/docs/desktop/linux)
- [Windows 10+](/docs/desktop/install/windows#compatibility)
- [macOS 12+](/docs/desktop/install/mac#compatibility)
- [Linux (Ubuntu 20.04+)](/docs/desktop/install/linux)
**Hardware**:
- Minimum: 8GB RAM, 10GB storage
@ -216,7 +216,7 @@ Jan is built on the shoulders of giants:
<FAQBox title="How does Jan protect privacy?">
- Runs 100% offline once models are downloaded
- All data stored locally in [Jan Data Folder](/docs/data-folder)
- All data stored locally in [Jan Data Folder](/docs/desktop/data-folder)
- No telemetry without explicit consent
- Open source code you can audit

View File

@ -193,7 +193,7 @@ $XDG_CONFIG_HOME = /home/username/custom_config
~/.config/Jan/data
```
See [Jan Data Folder](/docs/data-folder) for details.
See [Jan Data Folder](/docs/desktop/data-folder) for details.
## GPU Acceleration
@ -244,7 +244,7 @@ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64
### Step 2: Enable GPU Acceleration
1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Local Engine** > **Llama.cpp**
2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/desktop/local-engines/llama-cpp).
<Callout type="info">
CUDA offers better performance than Vulkan.
@ -258,7 +258,7 @@ CUDA offers better performance than Vulkan.
Requires Vulkan support.
1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/desktop/local-engines/llama-cpp).
</Tabs.Tab>
@ -266,7 +266,7 @@ Requires Vulkan support.
Requires Vulkan support.
1. Navigate to **Settings** (<Settings width={16} height={16} style={{display:"inline"}}/>) > **Hardware** > **GPUs**
2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/local-engines/llama-cpp).
2. Select appropriate backend in **llama-cpp Backend**. Details in our [guide](/docs/desktop/local-engines/llama-cpp).
</Tabs.Tab>
</Tabs>

View File

@ -111,7 +111,7 @@ Default location:
# Default installation directory
~/Library/Application\ Support/Jan/data
```
See [Jan Data Folder](/docs/data-folder) for details.
See [Jan Data Folder](/docs/desktop/data-folder) for details.
## Uninstall Jan
@ -158,7 +158,7 @@ No, it cannot be restored once you delete the Jan data folder during uninstallat
</FAQBox>
<Callout type="info">
💡 Warning: If you have any trouble during installation, please see our [Troubleshooting](/docs/troubleshooting)
💡 Warning: If you have any trouble during installation, please see our [Troubleshooting](/docs/desktop/troubleshooting)
guide to resolve your problem.
</Callout>

View File

@ -119,7 +119,7 @@ Default installation path:
~\Users\<YourUsername>\AppData\Roaming\Jan\data
```
See [Jan Data Folder](/docs/data-folder) for complete folder structure details.
See [Jan Data Folder](/docs/desktop/data-folder) for complete folder structure details.
## GPU Acceleration

View File

@ -24,7 +24,7 @@ import { Settings } from 'lucide-react'
`llama.cpp` is the core **inference engine** Jan uses to run AI models locally on your computer. This section covers the settings for the engine itself, which control *how* a model processes information on your hardware.
<Callout>
Looking for API server settings (like port, host, CORS)? They have been moved to the dedicated [**Local API Server**](/docs/api-server) page.
Looking for API server settings (like port, host, CORS)? They have been moved to the dedicated [**Local API Server**](/docs/desktop/api-server) page.
</Callout>
## Accessing Engine Settings

View File

@ -30,9 +30,9 @@ This guide shows you how to add, customize, and delete models within Jan.
Local models are managed through [Llama.cpp](https://github.com/ggerganov/llama.cpp), and these models are in a
format called GGUF. When you run them locally, they will use your computer's memory (RAM) and processing power, so
please make sure that you download models that match the hardware specifications for your operating system:
- [Mac](/docs/desktop/mac#compatibility)
- [Windows](/docs/desktop/windows#compatibility)
- [Linux](/docs/desktop/linux#compatibility).
- [Mac](/docs/desktop/install/mac#compatibility)
- [Windows](/docs/desktop/install/windows#compatibility)
- [Linux](/docs/desktop/install/linux#compatibility).
### Adding Models
@ -156,7 +156,7 @@ For advanced users who want to add a specific model that is not available within
Key fields to configure:
1. The **Settings** array is where you can set the path or location of your model in your computer, the context
length allowed, and the chat template expected by your model.
2. The [**Parameters**](/docs/model-parameters) are the adjustable settings that affect how your model operates or
2. The [**Parameters**](/docs/desktop/model-parameters) are the adjustable settings that affect how your model operates or
processes the data. The fields in the parameters array are typically general and can be used across different
models. Here is an example of model parameters:
@ -186,7 +186,7 @@ models. Here is an example of model parameters:
<Callout type="info">
When using cloud models, be aware of any associated costs and rate limits from the providers. See detailed guide for
each cloud model provider [here](/docs/remote-models/anthropic).
each cloud model provider [here](/docs/desktop/remote-models/anthropic).
</Callout>
Jan supports connecting to various AI cloud providers that are OpenAI API-compatible, including: OpenAI (GPT-4o, o3,...),

View File

@ -100,7 +100,7 @@ making your workflows more modular and adaptable over time.
<Callout type="info">
To use MCP effectively, ensure your AI model supports tool calling capabilities:
- For cloud models (like Claude or GPT-4): Verify tool calling is enabled in your API settings
- For local models: Enable tool calling in the model parameters [click the edit button in Model Capabilities](/docs/model-parameters#model-capabilities-edit-button)
- For local models: Enable tool calling in the model parameters [click the edit button in Model Capabilities](/docs/desktop/model-parameters#model-capabilities-edit-button)
- Check the model's documentation to confirm MCP compatibility
</Callout>

View File

@ -26,7 +26,7 @@ import { Callout } from 'nextra/components'
Jan is your AI. Period. Here's what we do with data.
<Callout>
Full privacy policy lives [here](/docs/privacy-policy), if you're into that sort of thing.
Full privacy policy lives [here](/docs/desktop/privacy-policy), if you're into that sort of thing.
</Callout>
<Callout type="info">

View File

@ -27,7 +27,7 @@ Get up and running with Jan in minutes. This guide will help you install Jan, do
### Step 1: Install Jan
1. [Download Jan](/download)
2. Install the app ([Mac](/docs/desktop/mac), [Windows](/docs/desktop/windows), [Linux](/docs/desktop/linux))
2. Install the app ([Mac](/docs/desktop/install/mac), [Windows](/docs/desktop/install/windows), [Linux](/docs/desktop/install/linux))
3. Launch Jan
### Step 2: Download Jan v1
@ -61,7 +61,7 @@ Try asking Jan v1 questions like:
- "What are the pros and cons of electric vehicles?"
<Callout type="tip">
**Want to give Jan v1 access to current web information?** Check out our [Serper MCP tutorial](/docs/mcp-examples/search/serper) to enable real-time web search with 2,500 free searches!
**Want to give Jan v1 access to current web information?** Check out our [Serper MCP tutorial](/docs/desktop/mcp-examples/search/serper) to enable real-time web search with 2,500 free searches!
</Callout>
</Steps>
@ -138,4 +138,4 @@ Connect to OpenAI, Anthropic, Groq, Mistral, and others:
![Connect Remote APIs](./_assets/quick-start-03.png)
For detailed setup, see [Remote APIs](/docs/remote-models/openai).
For detailed setup, see [Remote APIs](/docs/desktop/remote-models/openai).

View File

@ -56,7 +56,7 @@ Ensure your API key has sufficient credits
## Available Anthropic Models
Jan automatically includes Anthropic's available models. In case you want to use a specific Anthropic model
that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
- See list of available models in [Anthropic Models](https://docs.anthropic.com/claude/docs/models-overview).
- The `id` property must match the model name in the list. For example, `claude-opus-4@20250514`, `claude-sonnet-4@20250514`, or `claude-3-5-haiku@20241022`.
@ -72,7 +72,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify Anthropic's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm your API key has access to the model

View File

@ -55,7 +55,7 @@ Ensure your API key has sufficient credits.
## Available Cohere Models
Jan automatically includes Cohere's available models. In case you want to use a specific
Cohere model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models):
Cohere model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/desktop/manage-models):
- See list of available models in [Cohere Documentation](https://docs.cohere.com/v2/docs/models).
- The `id` property must match the model name in the list. For example, `command-nightly` or `command-light`.
@ -71,7 +71,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify Cohere's [system status](https://status.cohere.com/)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm your API key has access to the model

View File

@ -53,7 +53,7 @@ Ensure your API key has sufficient credits
## Available Google Models
Jan automatically includes Google's available models like Gemini series. In case you want to use a specific
Gemini model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
Gemini model that you cannot find in **Jan**, follow instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
- See list of available models in [Google Models](https://ai.google.dev/gemini-api/docs/models/gemini).
- The `id` property must match the model name in the list. For example, `gemini-1.5-pro` or `gemini-2.0-flash-lite-preview`.
@ -69,7 +69,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify [Gemini's system status](https://www.google.com/appsstatus/dashboard/)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm your API key has access to the model

View File

@ -54,7 +54,7 @@ Ensure your API key has sufficient credits
## Available Models Through Groq
Jan automatically includes Groq's available models. In case you want to use a specific Groq model that
you cannot find in **Jan**, follow the instructions in the [Add Cloud Models](/docs/manage-models#add-models-1):
you cannot find in **Jan**, follow the instructions in the [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
- See list of available models in [Groq Documentation](https://console.groq.com/docs/models).
- The `id` property must match the model name in the list. For example, if you want to use Llama3.3 70B, you must set the `id` property to `llama-3.3-70b-versatile`.
@ -70,7 +70,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify Groq's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm your API key has access to the model

View File

@ -141,7 +141,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify Hugging Face's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm your API key has access to the model

View File

@ -56,7 +56,7 @@ Ensure your API key has sufficient credits
## Available Mistral Models
Jan automatically includes Mistral's available models. In case you want to use a specific Mistral model
that you cannot find in **Jan**, follow the instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
that you cannot find in **Jan**, follow the instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
- See list of available models in [Mistral AI Documentation](https://docs.mistral.ai/platform/endpoints).
- The `id` property must match the model name in the list. For example, if you want to use
Mistral Large, you must set the `id` property to `mistral-large-latest`
@ -73,7 +73,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify Mistral AI's system status
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm your API key has access to the model

View File

@ -58,7 +58,7 @@ Start chatting
## Available OpenAI Models
Jan automatically includes popular OpenAI models. In case you want to use a specific model that you
cannot find in Jan, follow instructions in [Add Cloud Models](/docs/manage-models#add-models-1):
cannot find in Jan, follow instructions in [Add Cloud Models](/docs/desktop/manage-models#add-models-1):
- See list of available models in [OpenAI Platform](https://platform.openai.com/docs/models/overview).
- The id property must match the model name in the list. For example, if you want to use the
[GPT-4.5](https://platform.openai.com/docs/models/), you must set the id property
@ -76,7 +76,7 @@ Common issues and solutions:
2. Connection Problems
- Check your internet connection
- Verify OpenAI's [system status](https://status.openai.com)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
3. Model Unavailable
- Confirm your API key has access to the model

View File

@ -88,7 +88,7 @@ Common issues and solutions:
**2. Connection Problems**
- Check your internet connection
- Verify OpenRouter's [system status](https://status.openrouter.ai)
- Look for error messages in [Jan's logs](/docs/troubleshooting#how-to-get-error-logs)
- Look for error messages in [Jan's logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
**3. Model Unavailable**
- Confirm the model is currently available on OpenRouter

View File

@ -69,7 +69,7 @@ Click the gear icon next to any model to adjust how it behaves:
- **Presence Penalty**: Encourages the model to use varied vocabulary
<Callout type="info">
For detailed explanations of these parameters, see our [Model Parameters Guide](/docs/model-parameters).
For detailed explanations of these parameters, see our [Model Parameters Guide](/docs/desktop/model-parameters).
</Callout>
## Hardware Monitoring
@ -117,7 +117,7 @@ Access privacy settings at **Settings** > **Privacy**:
- Change this setting anytime
<Callout type="info">
See exactly what we collect (with your permission) in our [Privacy Policy](/docs/privacy).
See exactly what we collect (with your permission) in our [Privacy Policy](/docs/desktop/privacy).
</Callout>
![Analytics](./_assets/settings-07.png)
@ -174,7 +174,7 @@ This includes configuration for:
- CORS (Cross-Origin Resource Sharing)
- Verbose Logging
[**Go to Local API Server Settings &rarr;**](/docs/api-server)
[**Go to Local API Server Settings &rarr;**](/docs/desktop/api-server)
## Emergency Options

View File

@ -226,7 +226,7 @@ When models won't respond or show these errors:
- **RAM:** Use models under 80% of available memory
- 8GB system: Use models under 6GB
- 16GB system: Use models under 13GB
- **Hardware:** Verify your system meets [minimum requirements](/docs/troubleshooting#step-1-verify-hardware-and-system-requirements)
- **Hardware:** Verify your system meets [minimum requirements](/docs/desktop/troubleshooting#step-1-verify-hardware-and-system-requirements)
**2. Adjust Model Settings**
- Open model settings in the chat sidebar
@ -318,8 +318,8 @@ If these solutions don't work:
- Include your logs and system info
**3. Check Resources:**
- [System requirements](/docs/troubleshooting#step-1-verify-hardware-and-system-requirements)
- [Model compatibility guides](/docs/manage-models)
- [System requirements](/docs/desktop/troubleshooting#step-1-verify-hardware-and-system-requirements)
- [Model compatibility guides](/docs/desktop/manage-models)
- [Hardware setup guides](/docs/desktop/)
<Callout type="info">

View File

@ -68,7 +68,7 @@ Click the gear icon next to a model to configure advanced settings:
- **Repeat Penalty**: Controls how strongly the model avoids repeating phrases (higher values reduce repetition)
- **Presence Penalty**: Discourages reusing words that already appeared in the text (helps with variety)
_See [Model Parameters](/docs/model-parameters) for a more detailed explanation._
_See [Model Parameters](/docs/desktop/model-parameters) for a more detailed explanation._
## Hardware
@ -108,7 +108,7 @@ You can help improve Jan by sharing anonymous usage data:
2. You can change this setting at any time
<Callout type="info">
Read more about that we collect with opt-in users at [Privacy](/docs/privacy).
Read more about that we collect with opt-in users at [Privacy](/docs/desktop/privacy).
</Callout>
<br/>

View File

@ -328,19 +328,19 @@ This command ensures that the necessary permissions are granted for Jan's instal
When you start a chat with a model and encounter a **Failed to Fetch** or **Something's Amiss** error, here are some possible solutions to resolve it:
**1. Check System & Hardware Requirements**
- Hardware dependencies: Ensure your device meets all [hardware requirements](docs/troubleshooting#step-1-verify-hardware-and-system-requirements)
- OS: Ensure your operating system meets the minimum requirements ([Mac](/docs/desktop/mac#minimum-requirements), [Windows](/docs/desktop/windows#compatibility), [Linux](docs/desktop/linux#compatibility))
- Hardware dependencies: Ensure your device meets all [hardware requirements](docs/desktop/troubleshooting#step-1-verify-hardware-and-system-requirements)
- OS: Ensure your operating system meets the minimum requirements ([Mac](/docs/desktop/install/mac#minimum-requirements), [Windows](/docs/desktop/install/windows#compatibility), [Linux](/docs/desktop/install/linux#compatibility))
- RAM: Choose models that use less than 80% of your available RAM
- For 8GB systems: Use models under 6GB
- For 16GB systems: Use models under 13GB
**2. Check Model Parameters**
- In **Engine Settings** in right sidebar, check your `ngl` ([number of GPU layers](/docs/models/model-parameters#engine-parameters)) setting to see if it's too high
- In **Engine Settings** in right sidebar, check your `ngl` ([number of GPU layers](/docs/desktop/models/model-parameters#engine-parameters)) setting to see if it's too high
- Start with a lower NGL value and increase gradually based on your GPU memory
**3. Port Conflicts**
If you check your [app logs](/docs/troubleshooting#how-to-get-error-logs) & see "Bind address failed at 127.0.0.1:39291", check port availability:
If you check your [app logs](/docs/desktop/troubleshooting#how-to-get-error-logs) & see "Bind address failed at 127.0.0.1:39291", check port availability:
```
# Mac
netstat -an | grep 39291
@ -371,7 +371,7 @@ This will delete all chat history, models, and settings.
</Callout>
**5. Try a clean installation**
- Uninstall Jan & clean Jan data folders ([Mac](/docs/desktop/mac#uninstall-jan), [Windows](/docs/desktop/windows#uninstall-jan), [Linux](docs/desktop/linux#uninstall-jan))
- Uninstall Jan & clean Jan data folders ([Mac](/docs/desktop/install/mac#uninstall-jan), [Windows](/docs/desktop/install/windows#uninstall-jan), [Linux](/docs/desktop/install/linux#uninstall-jan))
- Install the latest [stable release](/download)
<Callout type="warning">
@ -392,7 +392,7 @@ The "Unexpected token" error usually relates to OpenAI API authentication or reg
## Need Further Support?
If you can't find what you need in our troubleshooting guide, feel free reach out to us for extra help:
- **Copy** your [app logs](/docs/troubleshooting#how-to-get-error-logs)
- **Copy** your [app logs](/docs/desktop/troubleshooting#how-to-get-error-logs)
- Go to our [Discord](https://discord.com/invite/FTk2MvZwJH) & send it to **#🆘|jan-help** channel for further support.

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 634 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

View File

@ -17,7 +17,7 @@ Jan now supports [NVIDIA TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) i
We've been excited for TensorRT-LLM for a while, and [had a lot of fun implementing it](https://github.com/menloresearch/nitro-tensorrt-llm). As part of the process, we've run some benchmarks, to see how TensorRT-LLM fares on consumer hardware (e.g. [4090s](https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/), [3090s](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/)) we commonly see in the [Jan's hardware community](https://discord.com/channels/1107178041848909847/1201834752206974996).
<Callout type="info" >
**Give it a try!** Jan's [TensorRT-LLM extension](/docs/built-in/tensorrt-llm) is available in Jan v0.4.9 and up ([see more](/docs/built-in/tensorrt-llm)). We precompiled some TensorRT-LLM models for you to try: `Mistral 7b`, `TinyLlama-1.1b`, `TinyJensen-1.1b` 😂
**Give it a try!** Jan's [TensorRT-LLM extension](/docs/desktop/built-in/tensorrt-llm) is available in Jan v0.4.9 and up ([see more](/docs/desktop/built-in/tensorrt-llm)). We precompiled some TensorRT-LLM models for you to try: `Mistral 7b`, `TinyLlama-1.1b`, `TinyJensen-1.1b` 😂
Bugs or feedback? Let us know on [GitHub](https://github.com/menloresearch/jan) or via [Discord](https://discord.com/channels/1107178041848909847/1201832734704795688).
</Callout>

View File

@ -126,7 +126,7 @@ any version with Model Context Protocol in it (>`v0.6.3`).
**The Key: Assistants + Tools**
Running deep research in Jan can be accomplished by combining [custom assistants](https://jan.ai/docs/assistants)
with [MCP search tools](https://jan.ai/docs/mcp-examples/search/exa). This pairing allows any model—local or
with [MCP search tools](https://jan.ai/docs/desktop/mcp-examples/search/exa). This pairing allows any model—local or
cloud—to follow a systematic research workflow, to create a report similar to that of other providers, with some
visible limitations (for now).

View File

@ -0,0 +1,268 @@
---
title: "How we (try to) benchmark GPU kernels accurately"
description: "We present the process behind how we decided to benchmark GPU kernels and iteratively improved our benchmarking pipeline"
tags: ""
categories: research
ogImage: "./_assets/cover-kernel-benchmarking.png"
date: 2025-09-17
---
# How we (try to) benchmark GPU kernels accurately
If you've read any other kernel benchmarking article before, then a lot of the information in this blogpost might be second-hand nature to you already. In fact, we would like to start off by thanking the blood, sweat and tears poured into many other kernel benchmarking guides made before us, which helped guide us in writing better benchmarking code, as well as in the creation of this blogpost.
Over here at Menlo, we recently acquired some [RTX PRO 6000 Blackwell Workstation Edition](https://www.nvidia.com/en-sg/products/workstations/professional-desktop-gpus/rtx-pro-6000/), and we are trying to make LLM inference engines like [vLLM](https://github.com/vllm-project/vllm) run faster on it. We've been writing our own kernels specifically for the RTX PRO 6000, and seeing if we can improve inference times on our hardware.
This blog will detail more of our own processes of how our ML Efficiency team identified problems in our benchmarking code, and how we iterated on it following the various cool benchmarking guides out there! Without further ado, let's start benchmarking from simple programs, to GPU kernels.
## Introduction to kernels and benchmarking
For those new to GPU programming, a [kernel](https://modal.com/gpu-glossary/device-software/kernel) is a piece of CUDA code that programmers write to execute a desired sequence of operations on the GPU. These kernels are launched once and is executed by threads that run concurrently, and we tend to launch these kernels from a [thread block grid](https://modal.com/gpu-glossary/device-software/thread-block-grid), which executes our kernel across multiple [Streaming Multiprocessors (SMs)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor) across the whole GPU.
Benchmarking is a fundamental aspect of high-performance computing. It enables us to quantitatively compare kernel performance across different problem sizes and understand how various hyperparameters impact execution speed. For GPU kernel development, benchmarking serves to assist us in iteratively optimizing our kernels to make them utilize the GPU better.
That being said, **accurate kernel benchmarking** is much more important, as benchmarking kernels that run on the GPU can become very complex, and there are many traps one might fall into if not enough care is taken when writing benchmarking scripts. A great alternative is to use tools that NVIDIA offers via their [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit), such as [Nsight CUDA Profiling Tools Interface](https://developer.nvidia.com/cupti) (cupti) or use the [Nsight Compute CLI](https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html) (`ncu`), which provide accurate measurements to benchmarking various characteristics of kernels. For us, we wanted to use Python as it made it more convenient to sweep across different problem shapes and kernels quickly, but that meant we also had to learn how to properly benchmark kernels from scratch.
We will showcase some examples of how we can benchmark kernels on the GPU. Moreover, we have chosen Python for most of our benchmarking code, as most of our own codebase is in Python, making it simple to integrate in.
## Benchmarking CUDA programs
Pytorch provides a very basic API to help time `torch` programs, by following along this [tutorial](https://docs.pytorch.org/tutorials/recipes/recipes/benchmark.html).
We can see a basic implementation could be something as simple as:
```python
def batched_dot_mul_sum(a, b):
'''Computes batched dot by multiplying and summing'''
return a.mul(b).sum(-1)
num_threads = torch.get_num_threads()
print(f'Benchmarking on {num_threads} threads')
t0 = benchmark.Timer(
stmt='batched_dot_mul_sum(x, x)',
setup='from __main__ import batched_dot_mul_sum',
globals={'x': x},
num_threads=num_threads,
label='Multithreaded batch dot',
sub_label='Implemented using mul and sum')
```
When benchmarking kernels, there are a few tips that we should do, in order to ensure that we accurately benchmark our kernels.
### 1. Always benchmark the code not with settings from your machine, but with **settings the user will see**.
Benchmarking how fast your kernels take on a 3090 is meaningless if you are serving your models on a H100 DGX node. It is always a good idea to benchmark your kernels on the hardware you plan to serve on.
### 2. **Warmup your kernels**
Take a look at this snippet from the tutorial.
```
mul_sum(x, x): 27.6 μs
mul_sum(x, x): 25.3 μs
bmm(x, x): 2775.5 μs
bmm(x, x): 22.4 μs
```
The first kernel for `bmm` takes way longer to run. This is because most of the time is spent loading [cuBLAS](https://developer.nvidia.com/cublas) kernels when running for the first time.
Warming up your kernels can be as simple as running the kernel before timing it. This helps to preliminarily load these kernels so that we only measure how long it takes for the kernel to run.
### 3. `torch.cuda.synchronize` and CUDA Events
Now we'll also introduce a new API, which is the standard to benchmarking kernels. [CUDA events](https://docs.pytorch.org/docs/stable/generated/torch.cuda.Event.html) are awesome for a variety of reasons. The simplest reason is that it measures timing from the perspective from the GPU, whereas `time.time()` and `time.perf_counter()` measure time from the perspective of the CPU.
Moreover, its simplistic API allows you to call benchmarking code like this:
```python
steps = 10
start_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
end_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
for i in range(steps):
start_events[i].record()
run_kernel()
end_events[i].record()
torch.cuda.synchronize()
times = [s.elapsed_time(e) for s, e in zip(start_events, end_events)]
```
The `torch.cuda.synchronize` tells the CPU to wait for the work on the GPU to finish, so that it can calculate the elapsed time after synchronization, which can be visualised here:
![image](./_assets/speechmatics-events.svg)
_Figure 1: Illustration taken from https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch_
### 4. Flush your L2 Cache
#### What is the L2 Cache
When data is read or written from [HBM or GDDR](https://www.exxactcorp.com/blog/hpc/gddr6-vs-hbm-gpu-memory), it goes through the [L2 cache](https://docs.nvidia.com/cuda/cuda-c-programming-guide/#architecture) first, which is shared by all [streaming multiprocessors (SM)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor). This L2 cache caches data accesses to local and global memory, and helps reuse data instead of loading it to shared memory again (which can be slow!).
Moreover, unlike the L1 cache, which is present on each SM, all SMs share the same L2 cache!
#### Why we need to flush the L2 Cache
Following this [guide](https://guillesanbri.com/CUDA-Benchmarks/#caches), if you had warmed up or run the kernel before, some of the intermediate data might be stored on the L2 cache, which means the kernel might be deceptively faster.
However, in a real world setting, you want to measure the time it takes realistically for the kernel to run, and more often than not, when running large models, you will be running more than 1 kernel. This means your cache will probably thrash often, and not have data from a specific kernel stored for reuse. Thus, to simulate this behaviour, we would flush the L2 cache beforehand to eliminate "help" from the L2 cache.
Moreover, this also makes it much easier when calculating data reuse for the kernel, as now any L2 cache usage is independent of other kernels or runs.
#### Example of not flushing L2 cache
Previously when we were initially benchmarking our kernels, we had a small mistake of not flushing the L2 cache.
![image](./_assets/exceed-sol.png)
_Figure 2: Our SOL % (which is a percentage of our observed maximum speed) is over 100% for the row for shape [2, 19456, 2560]._
#### How to flush the L2 Cache
To flush it, we should add the following lines:
```python
l2_size = torch.cuda.get_device_properties().L2_cache_size
cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
#<your benchmarking code here>
cache.zero_() # flush L2 cache
# You should flush your L2 cache within the benchmarking code if you're repeating the same process multiple times
```
This instantiates data the size of the L2 cache, and by zeroing it in place, we call a write operation which goes through the L2 cache and flushes it.
After flushing the L2 cache, we get a more sensible result here:
![image](./_assets/fixed-l2.png)
_Figure 3: New SOL% has all values under 100% now after flushing L2 cache._
### 5. Timing short-lived kernels
Initially, we used [Triton's](https://triton-lang.org/main/getting-started/installation.html) [`do_bench`](https://triton-lang.org/main/python-api/generated/triton.testing.do_bench.html) for benchmarking, as it has done everything we have mentioned above, such as warmup, CUDA Events and flushing L2 cache. However, we observed an issue with accurately benchmarking our kernels on smaller shapes. On smaller shapes, the kernel might be too fast, so it may finish before CPU issues a CUDA end event in Python.
![image](./_assets/speechmatics-too-fast.png)
_Figure 4: Taken from [Speechmatics](https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch), kernel is faster than CUDA event end launch, and therefore the true timing for the kernel is not recorded._
This results in kernels that look very slow:
![image](./_assets/small-timed-bug.png)
_Figure 5: Side by side comparison of Python benchmark latencies vs `ncu`'s timing (right) for shape [2, 19456,2560]. `ncu` records a much faster duration of 71.36 μs compared to Python's 103.9 μs_
To fix this, we wrote a custom `do_bench_cuda()`, that inserts a dummy, untimed FP32 matmul before benchmarking each shape, so that the CPU has enough time to enqueue the CUDA end event.
This led to more accurate latencies for our small M kernels.
![image](./_assets/fixed-l2.png)
_Figure 6: There is a significant improvement in SOL% after inserting the dummy matmul._
We then also repeat the benchmark function for each shape on 5 copies of input/output data to make the CUDA event duration longer.
In the end, this is our `do_bench_cuda` function that we used to benchmark our kernels.
```python
import statistics
import torch
def do_bench_cuda(f, n_warmup: int = 10, n_repeats: int = 20):
l2_size = torch.cuda.get_device_properties().L2_cache_size
cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
# Matmul in case of short lived CUDA Events
A = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
B = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
A @ B
# L2 cache flush + Warmup
for _ in range(n_warmup):
cache.zero_()
f()
start_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
end_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
torch.cuda.synchronize()
for start, end in zip(start_list, end_list):
cache.zero_() # flush L2 cache
A @ B # add a heavy task to fill GPU pipeline
start.record()
f()
end.record()
torch.cuda.synchronize()
timings = [start.elapsed_time(end) for start, end in zip(start_list, end_list)]
return statistics.median(timings)
```
### 6. Clock Speed
This was a silent problem, and it was very hard to find out that this was causing issues on our kernels. We initially found discrepancies between `ncu`'s latency (676.64 μs) and `do_bench_cuda` latency (535 μs) when profiling on the shape [2048, 19456, 2560], as `do_bench` was reporting ~140 μs faster timings than `ncu`'s latency.
As seen, although most of our codebase for benchmarking kernels in Python, developer errors might occur, and it is always good to have a point of reference for the accurate kernel timing. Nsight Compute CLI (`ncu` for short) is a tool that can help us measure the latency of our kernels accurately, and the values obtained from here is a good figure to sanity-check our own benchmarking code.
#### 6.1 Clock Speed
Firstly, we suspected that clock speed could play a part in causing the discrepancy between `ncu`'s timings and our own benchmarking code. Clock speed can affect benchmarking times as it is the rate at which the GPU's processing units operate, and a higher clock speed translates to more operations per second, which can both speed up and slow down the kernel depending on how it was implemented.
![image](./_assets/clock-speed-effect.png)
_Figure 7: Taken from [GPU Mode Lecture 56](https://www.youtube.com/watch?v=CtrqBmYtSEk). We can see clock speed affects kernel performance. For problem shape of 1024, it got faster after increasing clock speed, while for problem shape of 384, it became slower after clock speed increased._
Looking at this [forum post](https://forums.developer.nvidia.com/t/nsight-compute-clock-speed-during-profiling/208646/3), we realised that one of the issues causing the discrepancy was because `ncu` by default locks the clock speed to the GPU base clock speed. We tried investigating by locking the clock speed to base clock speed, and also tried locking to max clock speed using `nvidia-smi -ac=<memClk>,<smClk>`. According to the GPU Mode lecture, this was not a proper solution.
This is due to the following reasons:
- Locking to max clock speed doesn't help as it just sets the ceiling of the GPU performance, our GPU can always go back to base clock speed of ~ 2287 Hz instead of boosted clock speed of 2617 Hz.
- Locking to base clock speed is also not meaningful, as it does not properly reflect the performance and experience users will get on our kernels, which at best will run on boosted clock speed.
However, we did find out that we should set the `ncu` `--clock-control` to `None`, so that it would not limit itself to just the base clock speeds. This helped improve the latency on `ncu` from 676.64 μs to 575 μs when profiled on the same problem shape of [2048, 19456, 2560].
#### 6.2 Discrepancies after `clock-control`
At the time of writing, we have observed that `ncu` sometimes gives different latency results on the same benchmarking code with the same problem shapes. The cause of this is because when we set `clock-control` speed to `None`, the GPU clock speed is stochastic, and therefore affect the latency of the kernels measured. A more holistic approach would be to also benchmark kernels across different fixed clock speeds.
![image](./_assets/ncu-compare.png)
_Figure 8: On the same benchmarking code and problem shapes, we can see vast deviations in duration, which is caused by the differences in SM Frequency. This resonates with the graphs shown in Figure 7._
As a result, there can be some discrepancy in `ncu`'s and our own benchmark timings. To figure out if your discrepancy is caused by the SM frequency, you can use the relationship
that FLOPS is directly proportional to SM clock, so their durations are inversely proportional.
In our case:
`544 / 2.14 (SM freq for 575 μs kernel) * 2.28 (SM freq for 544 μs kernel) = ~579`, so most of the discrepancy was coming from the SM frequency being different.
The final command we used was:
`ncu -s 5 -k $kernel_name --clock-control none python3 benchmarks/bench_mm.py --profile 2048 19456 2560`
Explanation of arguments:
`-s`: Number of kernels skipped
`-k`: name of kernel to profile
`--clock-control`: Whether clock speed is controlled
Below is a side to side comparison of `ncu`'s benchmarked latency and our script after all the adjustments made.
![image](./_assets/ncu-bench.png)
_Figure 9: Side by side comparison of the above `ncu` command (left) (measuring shape [2048,19456,2560]) with our own Python benchmarking script (right). We can see at most 10us difference between `Duration` in `ncu` and our benchmarking script's `Latency (us)` measurement._
## Conclusion and TLDR;
TLDR, when benchmarking
1. Make sure to use the hardware you intend to deploy on
2. Warmup before benchmarking your kernels
3. Use CUDA events
4. Flush your L2 Cache
5. Use a dummy matmul to make timings more accurate for short lived kernels
6. Ensure your clock speed doesn't cause inconsistent readings
We hope that this helps anyone who are interested in benchmarking their own kernels, or are interested in how GPU kernels are benchmarked. Happy benchmarking!
### Acknowledgements and Related Resources:
We would like to give thanks and credit to the many resources and guides that we used in our own journey of finding out how to best benchmark our kernels on our GPUs, and a lot of our work would probably not be possible if it was not for these amazing guides.
- GPU Mode Lecture 56, presented by Georgii Evtushenko : https://www.youtube.com/watch?v=CtrqBmYtSEk
- https://www.spatters.ca/mma-matmul (Benchmarking using ncu for matrix multiplications)
- https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch (CUDA Events)
- https://guillesanbri.com/CUDA-Benchmarks/ (Good resource for introduction to benchmarking)
- https://modal.com/gpu-glossary/device-hardware/cuda-device-architecture (Glossary of Architecture in general)
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/#global-memory-5-x (L2 cache explanation)
- https://cvw.cac.cornell.edu/gpu-architecture/gpu-memory/memory_types (L1 vs L2 cache)

View File

@ -22,16 +22,16 @@
},
"devDependencies": {
"@janhq/core": "workspace:*",
"typescript": "^5.3.3",
"vite": "^5.0.0",
"vitest": "^2.0.0",
"zustand": "^5.0.8"
"typescript": "5.9.2",
"vite": "5.4.20",
"vitest": "2.1.9",
"zustand": "5.0.8"
},
"peerDependencies": {
"@janhq/core": "*",
"zustand": "^5.0.0"
"zustand": "5.0.3"
},
"dependencies": {
"@modelcontextprotocol/sdk": "^1.17.5"
"@modelcontextprotocol/sdk": "1.17.5"
}
}

View File

@ -16,14 +16,20 @@ import {
import { janApiClient, JanChatMessage } from './api'
import { janProviderStore } from './store'
// Jan models support tools via MCP
const JAN_MODEL_CAPABILITIES = ['tools'] as const
export default class JanProviderWeb extends AIEngine {
readonly provider = 'jan'
private activeSessions: Map<string, SessionInfo> = new Map()
override async onLoad() {
console.log('Loading Jan Provider Extension...')
try {
// Check and clear invalid Jan models (capabilities mismatch)
this.validateJanModelsLocalStorage()
// Initialize authentication and fetch models
await janApiClient.initialize()
console.log('Jan Provider Extension loaded successfully')
@ -35,22 +41,93 @@ export default class JanProviderWeb extends AIEngine {
super.onLoad()
}
// Verify Jan models capabilities in localStorage
private validateJanModelsLocalStorage() {
try {
console.log("Validating Jan models in localStorage...")
const storageKey = 'model-provider'
const data = localStorage.getItem(storageKey)
if (!data) return
const parsed = JSON.parse(data)
if (!parsed?.state?.providers) return
// Check if any Jan model has incorrect capabilities
let hasInvalidModel = false
for (const provider of parsed.state.providers) {
if (provider.provider === 'jan' && provider.models) {
for (const model of provider.models) {
console.log(`Checking Jan model: ${model.id}`, model.capabilities)
if (JSON.stringify(model.capabilities) !== JSON.stringify(JAN_MODEL_CAPABILITIES)) {
hasInvalidModel = true
console.log(`Found invalid Jan model: ${model.id}, clearing localStorage`)
break
}
}
}
if (hasInvalidModel) break
}
// If any invalid model found, just clear the storage
if (hasInvalidModel) {
// Force clear the storage
localStorage.removeItem(storageKey)
// Verify it's actually removed
const afterRemoval = localStorage.getItem(storageKey)
// If still present, try setting to empty state
if (afterRemoval) {
// Try alternative clearing method
localStorage.setItem(storageKey, JSON.stringify({ state: { providers: [] }, version: parsed.version || 3 }))
}
console.log('Cleared model-provider from localStorage due to invalid Jan capabilities')
// Force a page reload to ensure clean state
window.location.reload()
}
} catch (error) {
console.error('Failed to check Jan models:', error)
}
}
override async onUnload() {
console.log('Unloading Jan Provider Extension...')
// Clear all sessions
for (const sessionId of this.activeSessions.keys()) {
await this.unload(sessionId)
}
janProviderStore.reset()
console.log('Jan Provider Extension unloaded')
}
async get(modelId: string): Promise<modelInfo | undefined> {
return janApiClient
.getModels()
.then((list) => list.find((e) => e.id === modelId))
.then((model) =>
model
? {
id: model.id,
name: model.id, // Use ID as name for now
quant_type: undefined,
providerId: this.provider,
port: 443, // HTTPS port for API
sizeBytes: 0, // Size not provided by Jan API
tags: [],
path: undefined, // Remote model, no local path
owned_by: model.owned_by,
object: model.object,
capabilities: [...JAN_MODEL_CAPABILITIES],
}
: undefined
)
}
async list(): Promise<modelInfo[]> {
try {
const janModels = await janApiClient.getModels()
return janModels.map((model) => ({
id: model.id,
name: model.id, // Use ID as name for now
@ -62,7 +139,7 @@ export default class JanProviderWeb extends AIEngine {
path: undefined, // Remote model, no local path
owned_by: model.owned_by,
object: model.object,
capabilities: ['tools'], // Jan models support both tools via MCP
capabilities: [...JAN_MODEL_CAPABILITIES],
}))
} catch (error) {
console.error('Failed to list Jan models:', error)
@ -75,7 +152,7 @@ export default class JanProviderWeb extends AIEngine {
// For Jan API, we don't actually "load" models in the traditional sense
// We just create a session reference for tracking
const sessionId = `jan-${modelId}-${Date.now()}`
const sessionInfo: SessionInfo = {
pid: Date.now(), // Use timestamp as pseudo-PID
port: 443, // HTTPS port
@ -85,8 +162,10 @@ export default class JanProviderWeb extends AIEngine {
}
this.activeSessions.set(sessionId, sessionInfo)
console.log(`Jan model session created: ${sessionId} for model ${modelId}`)
console.log(
`Jan model session created: ${sessionId} for model ${modelId}`
)
return sessionInfo
} catch (error) {
console.error(`Failed to load Jan model ${modelId}:`, error)
@ -97,23 +176,23 @@ export default class JanProviderWeb extends AIEngine {
async unload(sessionId: string): Promise<UnloadResult> {
try {
const session = this.activeSessions.get(sessionId)
if (!session) {
return {
success: false,
error: `Session ${sessionId} not found`
error: `Session ${sessionId} not found`,
}
}
this.activeSessions.delete(sessionId)
console.log(`Jan model session unloaded: ${sessionId}`)
return { success: true }
} catch (error) {
console.error(`Failed to unload Jan session ${sessionId}:`, error)
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
error: error instanceof Error ? error.message : 'Unknown error',
}
}
}
@ -136,9 +215,12 @@ export default class JanProviderWeb extends AIEngine {
}
// Convert core chat completion request to Jan API format
const janMessages: JanChatMessage[] = opts.messages.map(msg => ({
const janMessages: JanChatMessage[] = opts.messages.map((msg) => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
content:
typeof msg.content === 'string'
? msg.content
: JSON.stringify(msg.content),
}))
const janRequest = {
@ -162,18 +244,18 @@ export default class JanProviderWeb extends AIEngine {
} else {
// Return single response
const response = await janApiClient.createChatCompletion(janRequest)
// Check if aborted after completion
if (abortController?.signal?.aborted) {
throw new Error('Request was aborted')
}
return {
id: response.id,
object: 'chat.completion' as const,
created: response.created,
model: response.model,
choices: response.choices.map(choice => ({
choices: response.choices.map((choice) => ({
index: choice.index,
message: {
role: choice.message.role,
@ -182,7 +264,12 @@ export default class JanProviderWeb extends AIEngine {
reasoning_content: choice.message.reasoning_content,
tool_calls: choice.message.tool_calls,
},
finish_reason: (choice.finish_reason || 'stop') as 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call',
finish_reason: (choice.finish_reason || 'stop') as
| 'stop'
| 'length'
| 'tool_calls'
| 'content_filter'
| 'function_call',
})),
usage: response.usage,
}
@ -193,7 +280,10 @@ export default class JanProviderWeb extends AIEngine {
}
}
private async *createStreamingGenerator(janRequest: any, abortController?: AbortController) {
private async *createStreamingGenerator(
janRequest: any,
abortController?: AbortController
) {
let resolve: () => void
let reject: (error: Error) => void
const chunks: any[] = []
@ -231,7 +321,7 @@ export default class JanProviderWeb extends AIEngine {
object: chunk.object,
created: chunk.created,
model: chunk.model,
choices: chunk.choices.map(choice => ({
choices: chunk.choices.map((choice) => ({
index: choice.index,
delta: {
role: choice.delta.role,
@ -261,14 +351,14 @@ export default class JanProviderWeb extends AIEngine {
if (abortController?.signal?.aborted) {
throw new Error('Request was aborted')
}
while (yieldedIndex < chunks.length) {
yield chunks[yieldedIndex]
yieldedIndex++
}
// Wait a bit before checking again
await new Promise(resolve => setTimeout(resolve, 10))
await new Promise((resolve) => setTimeout(resolve, 10))
}
// Yield any remaining chunks
@ -291,24 +381,38 @@ export default class JanProviderWeb extends AIEngine {
}
async delete(modelId: string): Promise<void> {
throw new Error(`Delete operation not supported for remote Jan API model: ${modelId}`)
throw new Error(
`Delete operation not supported for remote Jan API model: ${modelId}`
)
}
async update(modelId: string, model: Partial<modelInfo>): Promise<void> {
throw new Error(
`Update operation not supported for remote Jan API model: ${modelId}`
)
}
async import(modelId: string, _opts: ImportOptions): Promise<void> {
throw new Error(`Import operation not supported for remote Jan API model: ${modelId}`)
throw new Error(
`Import operation not supported for remote Jan API model: ${modelId}`
)
}
async abortImport(modelId: string): Promise<void> {
throw new Error(`Abort import operation not supported for remote Jan API model: ${modelId}`)
throw new Error(
`Abort import operation not supported for remote Jan API model: ${modelId}`
)
}
async getLoadedModels(): Promise<string[]> {
return Array.from(this.activeSessions.values()).map(session => session.model_id)
return Array.from(this.activeSessions.values()).map(
(session) => session.model_id
)
}
async isToolSupported(modelId: string): Promise<boolean> {
// Jan models support tool calls via MCP
console.log(`Checking tool support for Jan model ${modelId}: supported`);
return true;
console.log(`Checking tool support for Jan model ${modelId}: supported`)
return true
}
}
}

View File

@ -48,6 +48,18 @@ export class JanAuthService {
* Called on app load to check existing session
*/
async initialize(): Promise<void> {
// Ensure refreshtoken is valid (in case of expired session or secret change)
try {
await refreshToken()
} catch (error) {
console.log('Failed to refresh token on init:', error)
// If refresh fails, logout to clear any invalid state
console.log('Logging out and clearing auth state to clear invalid session...')
await logoutUser()
this.clearAuthState()
this.authBroadcast.broadcastLogout()
}
// Authentication state check
try {
if (!this.isAuthenticated()) {
// Not authenticated - ensure guest access

View File

@ -12,11 +12,11 @@
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^6.0.1",
"cpx": "1.5.0",
"rimraf": "6.0.1",
"rolldown": "1.0.0-beta.1",
"run-script-os": "^1.1.6",
"typescript": "^5.3.3"
"run-script-os": "1.1.6",
"typescript": "5.9.2"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",

View File

@ -15,11 +15,11 @@
"./main": "./dist/module.js"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^6.0.1",
"cpx": "1.5.0",
"rimraf": "6.0.1",
"rolldown": "1.0.0-beta.1",
"ts-loader": "^9.5.0",
"typescript": "^5.7.2"
"typescript": "5.9.2"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz"

View File

@ -12,12 +12,12 @@
"build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
},
"devDependencies": {
"cpx": "^1.5.0",
"rimraf": "^6.0.1",
"cpx": "1.5.0",
"rimraf": "6.0.1",
"rolldown": "1.0.0-beta.1",
"run-script-os": "^1.1.6",
"run-script-os": "1.1.6",
"typescript": "5.8.3",
"vitest": "^3.0.6"
"vitest": "3.2.4"
},
"files": [
"dist/*",
@ -26,7 +26,7 @@
],
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"@tauri-apps/api": "^2.5.0"
"@tauri-apps/api": "2.8.0"
},
"bundleDependencies": [],
"installConfig": {

View File

@ -17,24 +17,24 @@
"test:coverage": "vitest run --coverage"
},
"devDependencies": {
"@vitest/ui": "^3.2.4",
"cpx": "^1.5.0",
"jsdom": "^26.1.0",
"rimraf": "^3.0.2",
"@vitest/ui": "2.1.9",
"cpx": "1.5.0",
"jsdom": "26.1.0",
"rimraf": "3.0.2",
"rolldown": "1.0.0-beta.1",
"ts-loader": "^9.5.0",
"typescript": "^5.7.2",
"vitest": "^3.2.4"
"typescript": "5.9.2",
"vitest": "3.2.4"
},
"dependencies": {
"@janhq/core": "../../core/package.tgz",
"@janhq/tauri-plugin-hardware-api": "link:../../src-tauri/plugins/tauri-plugin-hardware",
"@janhq/tauri-plugin-llamacpp-api": "link:../../src-tauri/plugins/tauri-plugin-llamacpp",
"@tauri-apps/api": "^2.5.0",
"@tauri-apps/plugin-http": "^2.5.1",
"@tauri-apps/api": "2.8.0",
"@tauri-apps/plugin-http": "2.5.0",
"@tauri-apps/plugin-log": "^2.6.0",
"fetch-retry": "^5.0.6",
"ulidx": "^2.3.0"
"ulidx": "2.4.1"
},
"engines": {
"node": ">=18.0.0"

View File

@ -96,18 +96,6 @@
"textAlign": "right"
}
},
{
"key": "batch_size",
"title": "Batch Size",
"description": "Logical maximum batch size for processing prompts.",
"controllerType": "input",
"controllerProps": {
"value": 2048,
"placeholder": "2048",
"type": "number",
"textAlign": "right"
}
},
{
"key": "ubatch_size",
"title": "uBatch Size",

View File

@ -46,7 +46,6 @@ export async function getLocalInstalledBackends(): Promise<
}
}
}
console.debug(local)
return local
}
@ -319,7 +318,10 @@ export async function downloadBackend(
events.emit('onFileDownloadSuccess', { modelId: taskId, downloadType })
} catch (error) {
// Fallback: if GitHub fails, retry once with CDN
if (source === 'github') {
if (
source === 'github' &&
error?.toString() !== 'Error: Download cancelled'
) {
console.warn(`GitHub download failed, falling back to CDN:`, error)
return await downloadBackend(backend, version, 'cdn')
}

View File

@ -37,7 +37,13 @@ import {
import { invoke } from '@tauri-apps/api/core'
import { getProxyConfig } from './util'
import { basename } from '@tauri-apps/api/path'
import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
import {
readGgufMetadata,
estimateKVCacheSize,
getModelSize,
isModelSupported,
planModelLoadInternal,
} from '@janhq/tauri-plugin-llamacpp-api'
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
// Error message constant - matches web-app/src/utils/error.ts
@ -82,6 +88,7 @@ type ModelPlan = {
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}
@ -922,6 +929,30 @@ export default class llamacpp_extension extends AIEngine {
return hash
}
override async get(modelId: string): Promise<modelInfo | undefined> {
const modelPath = await joinPath([
await this.getProviderPath(),
'models',
modelId,
])
const path = await joinPath([modelPath, 'model.yml'])
if (!(await fs.existsSync(path))) return undefined
const modelConfig = await invoke<ModelConfig>('read_yaml', {
path,
})
return {
id: modelId,
name: modelConfig.name ?? modelId,
quant_type: undefined, // TODO: parse quantization type from model.yml or model.gguf
providerId: this.provider,
port: 0, // port is not known until the model is loaded
sizeBytes: modelConfig.size_bytes ?? 0,
} as modelInfo
}
// Implement the required LocalProvider interface methods
override async list(): Promise<modelInfo[]> {
const modelsDir = await joinPath([await this.getProviderPath(), 'models'])
@ -1085,7 +1116,10 @@ export default class llamacpp_extension extends AIEngine {
const archiveName = await basename(path)
logger.info(`Installing backend from path: ${path}`)
if (!(await fs.existsSync(path)) || (!path.endsWith('tar.gz') && !path.endsWith('zip'))) {
if (
!(await fs.existsSync(path)) ||
(!path.endsWith('tar.gz') && !path.endsWith('zip'))
) {
logger.error(`Invalid path or file ${path}`)
throw new Error(`Invalid path or file ${path}`)
}
@ -1121,6 +1155,49 @@ export default class llamacpp_extension extends AIEngine {
}
}
/**
* Update a model with new information.
* @param modelId
* @param model
*/
async update(modelId: string, model: Partial<modelInfo>): Promise<void> {
const modelFolderPath = await joinPath([
await this.getProviderPath(),
'models',
modelId,
])
const modelConfig = await invoke<ModelConfig>('read_yaml', {
path: await joinPath([modelFolderPath, 'model.yml']),
})
const newFolderPath = await joinPath([
await this.getProviderPath(),
'models',
model.id,
])
// Check if newFolderPath exists
if (await fs.existsSync(newFolderPath)) {
throw new Error(`Model with ID ${model.id} already exists`)
}
const newModelConfigPath = await joinPath([newFolderPath, 'model.yml'])
await fs.mv(modelFolderPath, newFolderPath).then(() =>
// now replace what values have previous model name with format
invoke('write_yaml', {
data: {
...modelConfig,
model_path: modelConfig?.model_path?.replace(
`${this.providerId}/models/${modelId}`,
`${this.providerId}/models/${model.id}`
),
mmproj_path: modelConfig?.mmproj_path?.replace(
`${this.providerId}/models/${modelId}`,
`${this.providerId}/models/${model.id}`
),
},
savePath: newModelConfigPath,
})
)
}
override async import(modelId: string, opts: ImportOptions): Promise<void> {
const isValidModelId = (id: string) => {
// only allow alphanumeric, underscore, hyphen, and dot characters in modelId
@ -1979,11 +2056,6 @@ export default class llamacpp_extension extends AIEngine {
return responseData as EmbeddingResponse
}
// Optional method for direct client access
override getChatClient(sessionId: string): any {
throw new Error('method not implemented yet')
}
/**
* Check if a tool is supported by the model
* Currently read from GGUF chat_template
@ -2046,7 +2118,7 @@ export default class llamacpp_extension extends AIEngine {
path: string,
meta: Record<string, string>
): Promise<{ layerSize: number; totalLayers: number }> {
const modelSize = await this.getModelSize(path)
const modelSize = await getModelSize(path)
const arch = meta['general.architecture']
const totalLayers = Number(meta[`${arch}.block_count`]) + 2 // 1 for lm_head layer and 1 for embedding layer
if (!totalLayers) throw new Error('Invalid metadata: block_count not found')
@ -2062,335 +2134,27 @@ export default class llamacpp_extension extends AIEngine {
/^\/\/[^/]+/.test(norm) // UNC path //server/share
)
}
/*
* if (!this.isAbsolutePath(path))
path = await joinPath([await getJanDataFolderPath(), path])
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
*/
async planModelLoad(
path: string,
mmprojPath?: string,
requestedCtx?: number
): Promise<ModelPlan> {
if (!this.isAbsolutePath(path))
if (!this.isAbsolutePath(path)) {
path = await joinPath([await getJanDataFolderPath(), path])
}
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
const modelSize = await this.getModelSize(path)
const memoryInfo = await this.getTotalSystemMemory()
const gguf = await readGgufMetadata(path)
// Get mmproj size if provided
let mmprojSize = 0
if (mmprojPath) {
mmprojSize = await this.getModelSize(mmprojPath)
}
const { layerSize, totalLayers } = await this.getLayerSize(
path,
gguf.metadata
)
const kvCachePerToken = (await this.estimateKVCache(gguf.metadata))
.perTokenSize
logger.info(
`Model size: ${modelSize}, Layer size: ${layerSize}, Total layers: ${totalLayers}, KV cache per token: ${kvCachePerToken}`
)
// Validate critical values
if (!modelSize || modelSize <= 0) {
throw new Error(`Invalid model size: ${modelSize}`)
}
if (!kvCachePerToken || kvCachePerToken <= 0) {
throw new Error(`Invalid KV cache per token: ${kvCachePerToken}`)
}
if (!layerSize || layerSize <= 0) {
throw new Error(`Invalid layer size: ${layerSize}`)
}
// Reserve memory for OS, other applications, and fixed engine overhead.
const VRAM_RESERVE_GB = 0.5
const VRAM_RESERVE_BYTES = VRAM_RESERVE_GB * 1024 * 1024 * 1024
const ENGINE_FIXED_OVERHEAD_BYTES = 0.2 * 1024 * 1024 * 1024 // For scratch buffers etc.
// Get model's maximum context length
const arch = gguf.metadata['general.architecture']
const modelMaxContextLength =
Number(gguf.metadata[`${arch}.context_length`]) || 8192
const MIN_CONTEXT_LENGTH = 1024
// Memory percentages applied to both VRAM and RAM
const memoryPercentages = { high: 0.7, medium: 0.5, low: 0.4 }
logger.info(
`Memory info - Total (VRAM + RAM): ${memoryInfo.totalMemory}, Total VRAM: ${memoryInfo.totalVRAM}, Mode: ${this.memoryMode}`
)
if (!memoryInfo.totalMemory || isNaN(memoryInfo.totalMemory)) {
throw new Error(`Invalid total memory: ${memoryInfo.totalMemory}`)
}
if (!memoryInfo.totalVRAM || isNaN(memoryInfo.totalVRAM)) {
throw new Error(`Invalid total VRAM: ${memoryInfo.totalVRAM}`)
}
if (!this.memoryMode || !(this.memoryMode in memoryPercentages)) {
throw new Error(
`Invalid memory mode: ${this.memoryMode}. Must be 'high', 'medium', or 'low'`
)
}
// Apply memory mode to both VRAM and RAM separately
const memoryModeMultiplier = memoryPercentages[this.memoryMode]
const usableVRAM = Math.max(
0,
memoryInfo.totalVRAM * memoryModeMultiplier -
VRAM_RESERVE_BYTES -
ENGINE_FIXED_OVERHEAD_BYTES
)
const actualSystemRAM = Math.max(0, memoryInfo.totalRAM)
const usableSystemMemory = actualSystemRAM * memoryModeMultiplier
logger.info(
`Actual System RAM: ${actualSystemRAM}, Usable VRAM for plan: ${usableVRAM}, Usable System Memory: ${usableSystemMemory}`
)
let gpuLayers = 0
let maxContextLength = 0
let noOffloadKVCache = false
let mode: ModelPlan['mode'] = 'Unsupported'
let offloadMmproj = false
let remainingVRAM = usableVRAM
if (mmprojSize > 0 && mmprojSize <= remainingVRAM) {
offloadMmproj = true
remainingVRAM -= mmprojSize
}
const vramForMinContext = (
await this.estimateKVCache(gguf.metadata, MIN_CONTEXT_LENGTH)
).size
const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
logger.error(
`Model unsupported. Not enough resources for model and min context.`
)
return {
gpuLayers: 0,
maxContextLength: 0,
noOffloadKVCache: true,
mode: 'Unsupported',
offloadMmproj: false,
}
}
const targetContext = Math.min(
requestedCtx || modelMaxContextLength,
modelMaxContextLength
)
let targetContextSize = (
await this.estimateKVCache(gguf.metadata, targetContext)
).size
// Use `kvCachePerToken` for all VRAM calculations
if (modelSize + targetContextSize <= remainingVRAM) {
mode = 'GPU'
gpuLayers = totalLayers
maxContextLength = targetContext
noOffloadKVCache = false
logger.info(
'Planning: Ideal case fits. All layers and target context in VRAM.'
)
} else if (modelSize <= remainingVRAM) {
mode = 'GPU'
gpuLayers = totalLayers
noOffloadKVCache = false
const vramLeftForContext = remainingVRAM - modelSize
maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
// Add safety check to prevent OOM
const safetyBuffer = 0.9 // Use 90% of calculated context to be safe
maxContextLength = Math.floor(maxContextLength * safetyBuffer)
logger.info(
`Planning: All layers fit in VRAM, but context must be reduced. VRAM left: ${vramLeftForContext}, kvCachePerToken: ${kvCachePerToken}, calculated context: ${maxContextLength}`
)
} else {
const vramAvailableForLayers = remainingVRAM - vramForMinContext
if (vramAvailableForLayers >= layerSize) {
mode = 'Hybrid'
gpuLayers = Math.min(
Math.floor(vramAvailableForLayers / layerSize),
totalLayers
)
noOffloadKVCache = false
const vramUsedByLayers = gpuLayers * layerSize
const vramLeftForContext = remainingVRAM - vramUsedByLayers
maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
logger.info(
'Planning: Hybrid mode. Offloading layers to fit context in VRAM.'
)
}
}
// Fallback logic: try different configurations if no VRAM-based plan worked
if (mode === 'Unsupported') {
logger.info('Planning: Trying fallback configurations...')
// Try putting some layers on GPU with KV cache in RAM
const possibleGpuLayers = Math.floor(remainingVRAM / layerSize)
if (possibleGpuLayers > 0) {
gpuLayers = Math.min(possibleGpuLayers, totalLayers)
const ramUsedByCpuLayers = (totalLayers - gpuLayers) * layerSize
const ramUsedByMmproj = !offloadMmproj ? mmprojSize : 0
const availableRamForKv =
usableSystemMemory - (ramUsedByCpuLayers + ramUsedByMmproj)
// Note: Use `kvCachePerToken` for RAM calculation, as the overhead is GPU-specific
const contextInRam = Math.floor(availableRamForKv / kvCachePerToken)
if (contextInRam >= MIN_CONTEXT_LENGTH) {
mode = 'Hybrid'
maxContextLength = contextInRam
noOffloadKVCache = true
logger.info(
`Planning: Fallback hybrid - GPU layers: ${gpuLayers}, Context in RAM: ${maxContextLength}`
)
}
}
// If still unsupported, try pure CPU mode
if (mode === 'Unsupported') {
gpuLayers = 0
noOffloadKVCache = true
offloadMmproj = false
const ramUsedByModel = modelSize + mmprojSize
const availableRamForKv = usableSystemMemory - ramUsedByModel
maxContextLength = Math.floor(availableRamForKv / kvCachePerToken)
if (maxContextLength >= MIN_CONTEXT_LENGTH) {
mode = 'CPU'
logger.info(`Planning: CPU mode - Context: ${maxContextLength}`)
}
}
}
if (mode === 'CPU' || noOffloadKVCache) {
offloadMmproj = false
}
if (requestedCtx && requestedCtx > 0) {
maxContextLength = Math.min(maxContextLength, requestedCtx)
}
maxContextLength = Math.min(maxContextLength, modelMaxContextLength)
if (maxContextLength < MIN_CONTEXT_LENGTH) {
mode = 'Unsupported'
}
if (mode === 'Unsupported') {
gpuLayers = 0
maxContextLength = 0
}
maxContextLength = isNaN(maxContextLength)
? 0
: Math.floor(maxContextLength)
const mmprojInfo = mmprojPath
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(
2
)}MB, offloadMmproj=${offloadMmproj}`
: ''
logger.info(
`Final plan for ${path}: gpuLayers=${gpuLayers}/${totalLayers}, ` +
`maxContextLength=${maxContextLength}, noOffloadKVCache=${noOffloadKVCache}, ` +
`mode=${mode}${mmprojInfo}`
)
return {
gpuLayers,
maxContextLength,
noOffloadKVCache,
mode,
offloadMmproj,
}
}
/**
* estimate KVCache size from a given metadata
*/
private async estimateKVCache(
meta: Record<string, string>,
ctx_size?: number
): Promise<{ size: number; perTokenSize: number }> {
const arch = meta['general.architecture']
if (!arch) throw new Error('Invalid metadata: architecture not found')
const nLayer = Number(meta[`${arch}.block_count`])
if (!nLayer) throw new Error('Invalid metadata: block_count not found')
const nHead = Number(meta[`${arch}.attention.head_count`])
if (!nHead) throw new Error('Invalid metadata: head_count not found')
// Try to get key/value lengths first (more accurate)
const keyLen = Number(meta[`${arch}.attention.key_length`])
const valLen = Number(meta[`${arch}.attention.value_length`])
let headDim: number
if (keyLen && valLen) {
// Use explicit key/value lengths if available
logger.info(
`Using explicit key_length: ${keyLen}, value_length: ${valLen}`
)
headDim = keyLen + valLen
} else {
// Fall back to embedding_length estimation
const embeddingLen = Number(meta[`${arch}.embedding_length`])
if (!embeddingLen)
throw new Error('Invalid metadata: embedding_length not found')
// Standard transformer: head_dim = embedding_dim / num_heads
// For KV cache: we need both K and V, so 2 * head_dim per head
headDim = (embeddingLen / nHead) * 2
logger.info(
`Using embedding_length estimation: ${embeddingLen}, calculated head_dim: ${headDim}`
)
}
const maxCtx = Number(meta[`${arch}.context_length`])
if (!maxCtx) throw new Error('Invalid metadata: context_length not found')
// b) If the user supplied a value, clamp it to the model's max
let ctxLen = ctx_size ? Math.min(ctx_size, maxCtx) : maxCtx
logger.info(`Final context length used for KV size: ${ctxLen}`)
logger.info(`nLayer: ${nLayer}, nHead: ${nHead}, headDim (K+V): ${headDim}`)
logger.info(`ctxLen: ${ctxLen}`)
logger.info(`nLayer: ${nLayer}`)
logger.info(`nHead: ${nHead}`)
logger.info(`headDim: ${headDim}`)
// Consider f16 by default
// Can be extended by checking cache-type-v and cache-type-k
// but we are checking overall compatibility with the default settings
// fp16 = 8 bits * 2 = 16
const bytesPerElement = 2
// Total KV cache size per token = nHead * headDim * bytesPerElement * nLayer
const kvPerToken = nHead * headDim * bytesPerElement * nLayer
return { size: ctxLen * kvPerToken, perTokenSize: kvPerToken }
}
private async getModelSize(path: string): Promise<number> {
if (path.startsWith('https://')) {
const res = await fetch(path, { method: 'HEAD' })
const len = res.headers.get('content-length')
return len ? parseInt(len, 10) : 0
} else {
return (await fs.fileStat(path)).size
try {
const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx)
return result
} catch (e) {
throw new Error(String(e))
}
}
@ -2404,50 +2168,11 @@ export default class llamacpp_extension extends AIEngine {
*/
async isModelSupported(
path: string,
ctx_size?: number
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
try {
const modelSize = await this.getModelSize(path)
const memoryInfo = await this.getTotalSystemMemory()
logger.info(`modelSize: ${modelSize}`)
const gguf = await readGgufMetadata(path)
let kvCacheSize: number
if (ctx_size) {
kvCacheSize = (await this.estimateKVCache(gguf.metadata, ctx_size)).size
} else {
kvCacheSize = (await this.estimateKVCache(gguf.metadata)).size
}
// Total memory consumption = model weights + kvcache
const totalRequired = modelSize + kvCacheSize
logger.info(
`isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
)
// Use 80% of total memory as the usable limit
const USABLE_MEMORY_PERCENTAGE = 0.9
const usableTotalMemory =
memoryInfo.totalRAM * USABLE_MEMORY_PERCENTAGE +
memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
// Check if model fits in total memory at all (this is the hard limit)
if (totalRequired > usableTotalMemory) {
return 'RED' // Truly impossible to run
}
// Check if everything fits in VRAM (ideal case)
if (totalRequired <= usableVRAM) {
return 'GREEN'
}
// If we get here, it means:
// - Total requirement fits in combined memory
// - But doesn't fit entirely in VRAM
// This is the CPU-GPU hybrid scenario
return 'YELLOW'
const result = await isModelSupported(path, Number(ctxSize))
return result
} catch (e) {
throw new Error(String(e))
}
@ -2601,7 +2326,8 @@ export default class llamacpp_extension extends AIEngine {
metadata: Record<string, string>
): Promise<number> {
// Extract vision parameters from metadata
const projectionDim = Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256
const projectionDim =
Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256
// Count images in messages
let imageCount = 0

View File

@ -1,6 +1,6 @@
{
"compilerOptions": {
"target": "es2016",
"target": "es2018",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",

File diff suppressed because it is too large Load Diff

286
mise.toml
View File

@ -1,286 +0,0 @@
[tools]
node = "20"
rust = "1.85.1"
sccache = "latest"
[env]
_.path = ['./node_modules/.bin']
RUSTC_WRAPPER="sccache"
# ============================================================================
# CORE SETUP AND CONFIGURATION TASKS
# ============================================================================
[tasks.config-yarn]
description = "Configure yarn version and settings"
run = [
"corepack enable",
"corepack prepare yarn@4.5.3 --activate",
"yarn --version",
"yarn config set -H enableImmutableInstalls false"
]
[tasks.install]
description = "Install dependencies"
depends = ["config-yarn"]
run = "yarn install"
sources = ['package.json', 'yarn.lock']
outputs = ['node_modules']
[tasks.build-tauri-plugin-api]
description = "Build Tauri plugin API"
depends = ["install"]
run = "yarn build:tauri:plugin:api"
sources = ['src-tauri/plugins/**/*']
outputs = [
'src-tauri/plugins/tauri-plugin-hardware/dist-js',
'src-tauri/plugins/tauri-plugin-llamacpp/dist-js',
]
[tasks.build-core]
description = "Build core package"
depends = ["build-tauri-plugin-api"]
run = "yarn build:core"
sources = ['core/**/*']
outputs = ['core/dist']
[tasks.build-extensions]
description = "Build extensions"
depends = ["build-core"]
run = "yarn build:extensions && yarn build:extensions-web"
sources = ['extensions/**/*']
outputs = ['pre-install/*.tgz']
[tasks.install-and-build]
description = "Install dependencies and build core and extensions (matches Makefile)"
depends = ["build-extensions"]
# ============================================================================
# DEVELOPMENT TASKS
# ============================================================================
[tasks.dev]
description = "Start development server (matches Makefile)"
depends = ["install-and-build"]
run = [
"yarn download:bin",
"yarn dev"
]
[tasks.dev-tauri]
description = "Start development server with Tauri (DEPRECATED - matches Makefile)"
depends = ["install-and-build"]
run = [
"yarn download:bin",
"yarn dev:tauri"
]
# ============================================================================
# WEB APPLICATION DEVELOPMENT TASKS
# ============================================================================
[tasks.dev-web-app]
description = "Start web application development server (matches Makefile)"
depends = ["build-core"]
run = "yarn dev:web-app"
[tasks.build-web-app]
description = "Build web application (matches Makefile)"
depends = ["build-core"]
run = "yarn build:web-app"
[tasks.serve-web-app]
description = "Serve built web application"
run = "yarn serve:web-app"
[tasks.build-serve-web-app]
description = "Build and serve web application (matches Makefile)"
depends = ["build-web-app"]
run = "yarn serve:web-app"
# ============================================================================
# BUILD TASKS
# ============================================================================
[tasks.install-rust-targets]
description = "Install required Rust targets for MacOS universal builds"
run = '''
#!/usr/bin/env bash
# Check if we're on macOS
if [[ "$OSTYPE" == "darwin"* ]]; then
echo "Detected macOS, installing universal build targets..."
rustup target add x86_64-apple-darwin
rustup target add aarch64-apple-darwin
echo "Rust targets installed successfully!"
fi
'''
[tasks.build]
description = "Build complete application (matches Makefile)"
depends = ["install-rust-targets", "install-and-build"]
run = [
"yarn download:bin",
"yarn build"
]
# ============================================================================
# QUALITY ASSURANCE TASKS
# ============================================================================
[tasks.lint]
description = "Run linting (matches Makefile)"
depends = ["build-extensions"]
run = "yarn lint"
# ============================================================================
# RUST TEST COMPONENTS
# ============================================================================
[tasks.test-rust-main]
description = "Test main src-tauri package"
run = "cargo test --manifest-path src-tauri/Cargo.toml --no-default-features --features test-tauri -- --test-threads=1"
[tasks.test-rust-hardware]
description = "Test hardware plugin"
run = "cargo test --manifest-path src-tauri/plugins/tauri-plugin-hardware/Cargo.toml"
[tasks.test-rust-llamacpp]
description = "Test llamacpp plugin"
run = "cargo test --manifest-path src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml"
[tasks.test-rust-utils]
description = "Test utils package"
run = "cargo test --manifest-path src-tauri/utils/Cargo.toml"
[tasks.test-rust]
description = "Run all Rust tests"
depends = ["test-rust-main", "test-rust-hardware", "test-rust-llamacpp", "test-rust-utils"]
# ============================================================================
# JS TEST COMPONENTS
# ============================================================================
[tasks.test-js-setup]
description = "Setup for JS tests"
run = [
"yarn download:bin",
"yarn download:lib",
"yarn copy:assets:tauri",
"yarn build:icon"
]
[tasks.test-js]
description = "Run JS tests"
depends = ["test-js-setup"]
run = "yarn test"
# ============================================================================
# COMBINED TEST TASKS
# ============================================================================
[tasks.test]
description = "Run complete test suite (matches Makefile)"
depends = ["lint", "test-js", "test-rust"]
# ============================================================================
# PARALLEL-FRIENDLY QUALITY ASSURANCE TASKS
# ============================================================================
[tasks.lint-only]
description = "Run linting only (parallel-friendly)"
depends = ["build-extensions"]
run = "yarn lint"
hide = true
[tasks.test-only]
description = "Run tests only (parallel-friendly)"
depends = ["build-extensions", "test-js", "test-rust"]
hide = true
[tasks.qa-parallel]
description = "Run linting and testing in parallel"
depends = ["lint-only", "test-only"]
# ============================================================================
# UTILITY TASKS
# ============================================================================
[tasks.clean]
description = "Clean all build artifacts and dependencies (cross-platform - matches Makefile)"
run = '''
#!/usr/bin/env bash
echo "Cleaning build artifacts and dependencies..."
# Platform detection and cleanup (matches Makefile exactly)
if [[ "$OSTYPE" == "msys" || "$OSTYPE" == "win32" ]]; then
# Windows cleanup using PowerShell (matches Makefile)
powershell -Command "Get-ChildItem -Path . -Include node_modules, .next, dist, build, out, .turbo, .yarn -Recurse -Directory | Remove-Item -Recurse -Force" 2>/dev/null || true
powershell -Command "Get-ChildItem -Path . -Include package-lock.json, tsconfig.tsbuildinfo -Recurse -File | Remove-Item -Recurse -Force" 2>/dev/null || true
powershell -Command "Remove-Item -Recurse -Force ./pre-install/*.tgz" 2>/dev/null || true
powershell -Command "Remove-Item -Recurse -Force ./extensions/*/*.tgz" 2>/dev/null || true
powershell -Command "Remove-Item -Recurse -Force ./electron/pre-install/*.tgz" 2>/dev/null || true
powershell -Command "Remove-Item -Recurse -Force ./src-tauri/resources" 2>/dev/null || true
powershell -Command "Remove-Item -Recurse -Force ./src-tauri/target" 2>/dev/null || true
powershell -Command "if (Test-Path \"\$(\$env:USERPROFILE)\\jan\\extensions\\\") { Remove-Item -Path \"\$(\$env:USERPROFILE)\\jan\\extensions\" -Recurse -Force }" 2>/dev/null || true
elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
# Linux cleanup (matches Makefile)
find . -name "node_modules" -type d -prune -exec rm -rf '{}' + 2>/dev/null || true
find . -name ".next" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "dist" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "build" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "out" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name ".turbo" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name ".yarn" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "package-lock.json" -type f -exec rm -rf '{}' + 2>/dev/null || true
rm -rf ./pre-install/*.tgz 2>/dev/null || true
rm -rf ./extensions/*/*.tgz 2>/dev/null || true
rm -rf ./electron/pre-install/*.tgz 2>/dev/null || true
rm -rf ./src-tauri/resources 2>/dev/null || true
rm -rf ./src-tauri/target 2>/dev/null || true
rm -rf ~/jan/extensions 2>/dev/null || true
rm -rf "~/.cache/jan*" 2>/dev/null || true
rm -rf "./.cache" 2>/dev/null || true
else
# macOS cleanup (matches Makefile)
find . -name "node_modules" -type d -prune -exec rm -rf '{}' + 2>/dev/null || true
find . -name ".next" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "dist" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "build" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "out" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name ".turbo" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name ".yarn" -type d -exec rm -rf '{}' + 2>/dev/null || true
find . -name "package-lock.json" -type f -exec rm -rf '{}' + 2>/dev/null || true
rm -rf ./pre-install/*.tgz 2>/dev/null || true
rm -rf ./extensions/*/*.tgz 2>/dev/null || true
rm -rf ./electron/pre-install/*.tgz 2>/dev/null || true
rm -rf ./src-tauri/resources 2>/dev/null || true
rm -rf ./src-tauri/target 2>/dev/null || true
rm -rf ~/jan/extensions 2>/dev/null || true
rm -rf ~/Library/Caches/jan* 2>/dev/null || true
fi
echo "Clean completed!"
'''
[tasks.all]
description = "Default target - shows available commands (matches Makefile)"
run = "echo 'Specify a target to run. Use: mise tasks'"
# ============================================================================
# DEVELOPMENT WORKFLOW SHORTCUTS
# ============================================================================
[tasks.setup]
description = "Complete development setup"
depends = ["install-and-build"]
alias = "init"
[tasks.ci]
description = "Run CI pipeline (lint + test sequentially)"
depends = ["test"]
[tasks.ci-parallel]
description = "Run CI pipeline (lint + test in parallel)"
depends = ["qa-parallel"]
alias = "ci-fast"

View File

@ -27,7 +27,8 @@
"copy:assets:tauri": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\" && cpx \"LICENSE\" \"src-tauri/resources/\"",
"download:lib": "node ./scripts/download-lib.mjs",
"download:bin": "node ./scripts/download-bin.mjs",
"build:tauri:win32": "yarn download:bin && yarn download:lib && yarn tauri build",
"download:windows-installer": "node ./scripts/download-win-installer-deps.mjs",
"build:tauri:win32": "yarn download:bin && yarn download:lib && yarn download:windows-installer && yarn tauri build",
"build:tauri:linux": "yarn download:bin && yarn download:lib && NO_STRIP=1 ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh",
"build:tauri:darwin": "yarn download:bin && yarn tauri build --target universal-apple-darwin",
"build:tauri": "yarn build:icon && yarn copy:assets:tauri && run-script-os",

View File

@ -0,0 +1,83 @@
console.log('Downloading Windows installer dependencies...')
// scripts/download-win-installer-deps.mjs
import https from 'https'
import fs, { mkdirSync } from 'fs'
import os from 'os'
import path from 'path'
import { copySync } from 'cpx'
function download(url, dest) {
return new Promise((resolve, reject) => {
console.log(`Downloading ${url} to ${dest}`)
const file = fs.createWriteStream(dest)
https
.get(url, (response) => {
console.log(`Response status code: ${response.statusCode}`)
if (
response.statusCode >= 300 &&
response.statusCode < 400 &&
response.headers.location
) {
// Handle redirect
const redirectURL = response.headers.location
console.log(`Redirecting to ${redirectURL}`)
download(redirectURL, dest).then(resolve, reject) // Recursive call
return
} else if (response.statusCode !== 200) {
reject(`Failed to get '${url}' (${response.statusCode})`)
return
}
response.pipe(file)
file.on('finish', () => {
file.close(resolve)
})
})
.on('error', (err) => {
fs.unlink(dest, () => reject(err.message))
})
})
}
async function main() {
console.log('Starting Windows installer dependencies download')
const platform = os.platform() // 'darwin', 'linux', 'win32'
const arch = os.arch() // 'x64', 'arm64', etc.
if (arch != 'x64') return
const libDir = 'src-tauri/resources/lib'
const tempDir = 'scripts/dist'
try {
mkdirSync('scripts/dist')
} catch (err) {
// Expect EEXIST error if the directory already exists
}
// Download VC++ Redistributable 17
if (platform == 'win32') {
const vcFilename = 'vc_redist.x64.exe'
const vcUrl = 'https://aka.ms/vs/17/release/vc_redist.x64.exe'
console.log(`Downloading VC++ Redistributable...`)
const vcSavePath = path.join(tempDir, vcFilename)
if (!fs.existsSync(vcSavePath)) {
await download(vcUrl, vcSavePath)
}
// copy to tauri resources
try {
copySync(vcSavePath, libDir)
} catch (err) {
// Expect EEXIST error
}
}
console.log('Windows installer dependencies downloads completed.')
}
main().catch((err) => {
console.error('Error:', err)
process.exit(1)
})

View File

@ -15,6 +15,8 @@ use tauri::Runtime;
static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new();
pub use commands::get_system_info;
/// Initialize the hardware plugin
pub fn init<R: Runtime>() -> tauri::plugin::TauriPlugin<R> {
tauri::plugin::Builder::new("hardware")

View File

@ -24,6 +24,7 @@ tauri = { version = "2.5.0", default-features = false, features = [] }
thiserror = "2.0.12"
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.11", features = ["json", "blocking", "stream"] }
tauri-plugin-hardware = { path = "../tauri-plugin-hardware" }
# Unix-specific dependencies
[target.'cfg(unix)'.dependencies]

View File

@ -14,6 +14,10 @@ const COMMANDS: &[&str] = &[
"get_session_by_model",
// GGUF commands
"read_gguf_metadata",
"estimate_kv_cache_size",
"get_model_size",
"is_model_supported",
"plan_model_load"
];
fn main() {

View File

@ -2,28 +2,28 @@ import { invoke } from '@tauri-apps/api/core'
// Types
export interface SessionInfo {
pid: number;
port: number;
model_id: string;
model_path: string;
api_key: string;
pid: number
port: number
model_id: string
model_path: string
api_key: string
}
export interface DeviceInfo {
id: string;
name: string;
memory: number;
id: string
name: string
memory: number
}
export interface GgufMetadata {
version: number;
tensor_count: number;
metadata: Record<string, string>;
version: number
tensor_count: number
metadata: Record<string, string>
}
// Cleanup commands
export async function cleanupLlamaProcesses(): Promise<void> {
return await invoke('plugin:llamacpp|cleanup_llama_processes');
return await invoke('plugin:llamacpp|cleanup_llama_processes')
}
// LlamaCpp server commands
@ -35,12 +35,12 @@ export async function loadLlamaModel(
return await invoke('plugin:llamacpp|load_llama_model', {
backendPath,
libraryPath,
args
});
args,
})
}
export async function unloadLlamaModel(pid: number): Promise<void> {
return await invoke('plugin:llamacpp|unload_llama_model', { pid });
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
}
export async function getDevices(
@ -49,8 +49,8 @@ export async function getDevices(
): Promise<DeviceInfo[]> {
return await invoke('plugin:llamacpp|get_devices', {
backendPath,
libraryPath
});
libraryPath,
})
}
export async function generateApiKey(
@ -59,35 +59,84 @@ export async function generateApiKey(
): Promise<string> {
return await invoke('plugin:llamacpp|generate_api_key', {
modelId,
apiSecret
});
apiSecret,
})
}
export async function isProcessRunning(pid: number): Promise<boolean> {
return await invoke('plugin:llamacpp|is_process_running', { pid });
return await invoke('plugin:llamacpp|is_process_running', { pid })
}
export async function getRandomPort(): Promise<number> {
return await invoke('plugin:llamacpp|get_random_port');
return await invoke('plugin:llamacpp|get_random_port')
}
export async function findSessionByModel(modelId: string): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId });
export async function findSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
}
export async function getLoadedModels(): Promise<string[]> {
return await invoke('plugin:llamacpp|get_loaded_models');
return await invoke('plugin:llamacpp|get_loaded_models')
}
export async function getAllSessions(): Promise<SessionInfo[]> {
return await invoke('plugin:llamacpp|get_all_sessions');
return await invoke('plugin:llamacpp|get_all_sessions')
}
export async function getSessionByModel(modelId: string): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId });
export async function getSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
}
// GGUF commands
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
return await invoke('plugin:llamacpp|read_gguf_metadata', { path });
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
}
export async function estimateKVCacheSize(
meta: Record<string, string>,
ctxSize?: number
): Promise<{ size: number; per_token_size: number }> {
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
meta,
ctxSize,
})
}
export async function getModelSize(path: string): Promise<number> {
return await invoke('plugin:llamacpp|get_model_size', { path })
}
export async function isModelSupported(
path: string,
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
return await invoke('plugin:llamacpp|is_model_supported', {
path,
ctxSize,
})
}
export async function planModelLoadInternal(
path: string,
memoryMode: string,
mmprojPath?: string,
requestedContext?: number
): Promise<{
gpuLayers: number
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}> {
return await invoke('plugin:llamacpp|plan_model_load', {
path,
memoryMode,
mmprojPath,
requestedContext,
})
}

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-estimate-kv-cache-size"
description = "Enables the estimate_kv_cache_size command without any pre-configured scope."
commands.allow = ["estimate_kv_cache_size"]
[[permission]]
identifier = "deny-estimate-kv-cache-size"
description = "Denies the estimate_kv_cache_size command without any pre-configured scope."
commands.deny = ["estimate_kv_cache_size"]

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-get-model-size"
description = "Enables the get_model_size command without any pre-configured scope."
commands.allow = ["get_model_size"]
[[permission]]
identifier = "deny-get-model-size"
description = "Denies the get_model_size command without any pre-configured scope."
commands.deny = ["get_model_size"]

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-is-model-supported"
description = "Enables the is_model_supported command without any pre-configured scope."
commands.allow = ["is_model_supported"]
[[permission]]
identifier = "deny-is-model-supported"
description = "Denies the is_model_supported command without any pre-configured scope."
commands.deny = ["is_model_supported"]

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-plan-model-load"
description = "Enables the plan_model_load command without any pre-configured scope."
commands.allow = ["plan_model_load"]
[[permission]]
identifier = "deny-plan-model-load"
description = "Denies the plan_model_load command without any pre-configured scope."
commands.deny = ["plan_model_load"]

View File

@ -16,6 +16,10 @@ Default permissions for the llamacpp plugin
- `allow-get-all-sessions`
- `allow-get-session-by-model`
- `allow-read-gguf-metadata`
- `allow-estimate-kv-cache-size`
- `allow-get-model-size`
- `allow-is-model-supported`
- `allow-plan-model-load`
## Permission Table
@ -55,6 +59,32 @@ Denies the cleanup_llama_processes command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-estimate-kv-cache-size`
</td>
<td>
Enables the estimate_kv_cache_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-estimate-kv-cache-size`
</td>
<td>
Denies the estimate_kv_cache_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-find-session-by-model`
</td>
@ -185,6 +215,32 @@ Denies the get_loaded_models command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-get-model-size`
</td>
<td>
Enables the get_model_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-get-model-size`
</td>
<td>
Denies the get_model_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-get-random-port`
</td>
@ -237,6 +293,32 @@ Denies the get_session_by_model command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-is-model-supported`
</td>
<td>
Enables the is_model_supported command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-is-model-supported`
</td>
<td>
Denies the is_model_supported command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-is-process-running`
</td>
@ -289,6 +371,32 @@ Denies the load_llama_model command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-plan-model-load`
</td>
<td>
Enables the plan_model_load command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-plan-model-load`
</td>
<td>
Denies the plan_model_load command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-read-gguf-metadata`
</td>

View File

@ -3,10 +3,10 @@ description = "Default permissions for the llamacpp plugin"
permissions = [
# Cleanup commands
"allow-cleanup-llama-processes",
# LlamaCpp server commands
"allow-load-llama-model",
"allow-unload-llama-model",
"allow-unload-llama-model",
"allow-get-devices",
"allow-generate-api-key",
"allow-is-process-running",
@ -15,7 +15,11 @@ permissions = [
"allow-get-loaded-models",
"allow-get-all-sessions",
"allow-get-session-by-model",
# GGUF commands
"allow-read-gguf-metadata"
"allow-read-gguf-metadata",
"allow-estimate-kv-cache-size",
"allow-get-model-size",
"allow-is-model-supported",
"allow-plan-model-load"
]

View File

@ -306,6 +306,18 @@
"const": "deny-cleanup-llama-processes",
"markdownDescription": "Denies the cleanup_llama_processes command without any pre-configured scope."
},
{
"description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
"type": "string",
"const": "allow-estimate-kv-cache-size",
"markdownDescription": "Enables the estimate_kv_cache_size command without any pre-configured scope."
},
{
"description": "Denies the estimate_kv_cache_size command without any pre-configured scope.",
"type": "string",
"const": "deny-estimate-kv-cache-size",
"markdownDescription": "Denies the estimate_kv_cache_size command without any pre-configured scope."
},
{
"description": "Enables the find_session_by_model command without any pre-configured scope.",
"type": "string",
@ -366,6 +378,18 @@
"const": "deny-get-loaded-models",
"markdownDescription": "Denies the get_loaded_models command without any pre-configured scope."
},
{
"description": "Enables the get_model_size command without any pre-configured scope.",
"type": "string",
"const": "allow-get-model-size",
"markdownDescription": "Enables the get_model_size command without any pre-configured scope."
},
{
"description": "Denies the get_model_size command without any pre-configured scope.",
"type": "string",
"const": "deny-get-model-size",
"markdownDescription": "Denies the get_model_size command without any pre-configured scope."
},
{
"description": "Enables the get_random_port command without any pre-configured scope.",
"type": "string",
@ -390,6 +414,18 @@
"const": "deny-get-session-by-model",
"markdownDescription": "Denies the get_session_by_model command without any pre-configured scope."
},
{
"description": "Enables the is_model_supported command without any pre-configured scope.",
"type": "string",
"const": "allow-is-model-supported",
"markdownDescription": "Enables the is_model_supported command without any pre-configured scope."
},
{
"description": "Denies the is_model_supported command without any pre-configured scope.",
"type": "string",
"const": "deny-is-model-supported",
"markdownDescription": "Denies the is_model_supported command without any pre-configured scope."
},
{
"description": "Enables the is_process_running command without any pre-configured scope.",
"type": "string",
@ -414,6 +450,18 @@
"const": "deny-load-llama-model",
"markdownDescription": "Denies the load_llama_model command without any pre-configured scope."
},
{
"description": "Enables the plan_model_load command without any pre-configured scope.",
"type": "string",
"const": "allow-plan-model-load",
"markdownDescription": "Enables the plan_model_load command without any pre-configured scope."
},
{
"description": "Denies the plan_model_load command without any pre-configured scope.",
"type": "string",
"const": "deny-plan-model-load",
"markdownDescription": "Denies the plan_model_load command without any pre-configured scope."
},
{
"description": "Enables the read_gguf_metadata command without any pre-configured scope.",
"type": "string",
@ -439,10 +487,10 @@
"markdownDescription": "Denies the unload_llama_model command without any pre-configured scope."
},
{
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`",
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`",
"type": "string",
"const": "default",
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`"
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`"
}
]
}

View File

@ -1,58 +1,141 @@
use super::helpers;
use super::types::GgufMetadata;
use reqwest;
use std::fs::File;
use std::io::BufReader;
use super::utils::{estimate_kv_cache_internal, read_gguf_metadata_internal};
use crate::gguf::types::{KVCacheError, KVCacheEstimate, ModelSupportStatus};
use std::collections::HashMap;
use std::fs;
use tauri::Runtime;
use tauri_plugin_hardware::get_system_info;
/// Read GGUF metadata from a model file
#[tauri::command]
pub async fn read_gguf_metadata(path: String) -> Result<GgufMetadata, String> {
if path.starts_with("http://") || path.starts_with("https://") {
// Remote: read in 2MB chunks until successful
return read_gguf_metadata_internal(path).await;
}
#[tauri::command]
pub async fn estimate_kv_cache_size(
meta: HashMap<String, String>,
ctx_size: Option<u64>,
) -> Result<KVCacheEstimate, KVCacheError> {
estimate_kv_cache_internal(meta, ctx_size).await
}
#[tauri::command]
pub async fn get_model_size(path: String) -> Result<u64, String> {
if path.starts_with("https://") {
// Handle remote URL
let client = reqwest::Client::new();
let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
let mut total_downloaded = 0;
let mut accumulated_data = Vec::new();
let response = client
.head(&path)
.send()
.await
.map_err(|e| format!("Failed to fetch HEAD request: {}", e))?;
while total_downloaded < max_total_size {
let start = total_downloaded;
let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
let resp = client
.get(&path)
.header("Range", format!("bytes={}-{}", start, end))
.send()
.await
.map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
let chunk_data = resp
.bytes()
.await
.map_err(|e| format!("Failed to read chunk response: {}", e))?;
accumulated_data.extend_from_slice(&chunk_data);
total_downloaded += chunk_data.len();
// Try parsing after each chunk
let cursor = std::io::Cursor::new(&accumulated_data);
if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
return Ok(metadata);
}
// If we got less data than expected, we've reached EOF
if chunk_data.len() < chunk_size {
break;
}
if let Some(content_length) = response.headers().get("content-length") {
let content_length_str = content_length
.to_str()
.map_err(|e| format!("Invalid content-length header: {}", e))?;
content_length_str
.parse::<u64>()
.map_err(|e| format!("Failed to parse content-length: {}", e))
} else {
Ok(0)
}
Err("Could not parse GGUF metadata from downloaded data".to_string())
} else {
// Local: use streaming file reader
let file =
File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
let reader = BufReader::new(file);
helpers::read_gguf_metadata(reader)
.map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
// Handle local file using standard fs
let metadata =
fs::metadata(&path).map_err(|e| format!("Failed to get file metadata: {}", e))?;
Ok(metadata.len())
}
}
#[tauri::command]
pub async fn is_model_supported<R: Runtime>(
path: String,
ctx_size: Option<u32>,
app_handle: tauri::AppHandle<R>,
) -> Result<ModelSupportStatus, String> {
// Get model size
let model_size = get_model_size(path.clone()).await?;
// Get system info
let system_info = get_system_info(app_handle.clone());
log::info!("modelSize: {}", model_size);
// Read GGUF metadata
let gguf = read_gguf_metadata(path.clone()).await?;
// Calculate KV cache size
let kv_cache_size = if let Some(ctx_size) = ctx_size {
log::info!("Using ctx_size: {}", ctx_size);
estimate_kv_cache_internal(gguf.metadata, Some(ctx_size as u64))
.await
.map_err(|e| e.to_string())?
.size
} else {
estimate_kv_cache_internal(gguf.metadata, None)
.await
.map_err(|e| e.to_string())?
.size
};
// Total memory consumption = model weights + kvcache
let total_required = model_size + kv_cache_size;
log::info!(
"isModelSupported: Total memory requirement: {} for {}; Got kvCacheSize: {} from BE",
total_required,
path,
kv_cache_size
);
const RESERVE_BYTES: u64 = 2288490189;
let total_system_memory = system_info.total_memory * 1024 * 1024;
// Calculate total VRAM from all GPUs
let total_vram: u64 = if system_info.gpus.is_empty() {
// On macOS with unified memory, GPU info may be empty
// Use total RAM as VRAM since memory is shared
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
total_system_memory
} else {
system_info
.gpus
.iter()
.map(|g| g.total_memory * 1024 * 1024)
.sum::<u64>()
};
log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
let usable_vram = if total_vram > RESERVE_BYTES {
total_vram - RESERVE_BYTES
} else {
0
};
let usable_total_memory = if total_system_memory > RESERVE_BYTES {
(total_system_memory - RESERVE_BYTES) + usable_vram
} else {
0
};
log::info!("System RAM: {} bytes", &total_system_memory);
log::info!("Total VRAM: {} bytes", &total_vram);
log::info!("Usable total memory: {} bytes", &usable_total_memory);
log::info!("Usable VRAM: {} bytes", &usable_vram);
log::info!("Required: {} bytes", &total_required);
// Check if model fits in total memory at all (this is the hard limit)
if total_required > usable_total_memory {
return Ok(ModelSupportStatus::Red); // Truly impossible to run
}
// Check if everything fits in VRAM (ideal case)
if total_required <= usable_vram {
return Ok(ModelSupportStatus::Green);
}
// If we get here, it means:
// - Total requirement fits in combined memory
// - But doesn't fit entirely in VRAM
// This is the CPU-GPU hybrid scenario
Ok(ModelSupportStatus::Yellow)
}

View File

@ -1,3 +1,5 @@
pub mod commands;
pub mod helpers;
pub mod types;
pub mod utils;
pub mod model_planner;

View File

@ -0,0 +1,318 @@
use crate::gguf::commands::get_model_size;
use crate::gguf::utils::estimate_kv_cache_internal;
use crate::gguf::utils::read_gguf_metadata_internal;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use tauri::Runtime;
use tauri_plugin_hardware::get_system_info;
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct ModelPlan {
pub gpu_layers: u64,
pub max_context_length: u64,
pub no_offload_kv_cache: bool,
pub offload_mmproj: bool,
pub batch_size: u64,
pub mode: ModelMode,
}
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
#[serde(rename_all = "UPPERCASE")]
pub enum ModelMode {
GPU,
Hybrid,
CPU,
Unsupported,
}
#[tauri::command]
pub async fn plan_model_load<R: Runtime>(
path: String,
memory_mode: String,
mmproj_path: Option<String>,
requested_ctx: Option<u64>,
app: tauri::AppHandle<R>,
) -> Result<ModelPlan, String> {
let model_size = get_model_size(path.clone()).await?;
let sys_info = get_system_info(app.clone());
let gguf = read_gguf_metadata_internal(path.clone()).await?;
let mut mmproj_size: u64 = 0;
if let Some(ref mmproj) = mmproj_path {
mmproj_size = get_model_size(mmproj.clone()).await?;
}
let arch = gguf
.metadata
.get("general.architecture")
.ok_or("Missing architecture")?;
let repeating_layers: u64 = gguf
.metadata
.get(&format!("{arch}.block_count"))
.ok_or("Missing block_count")?
.parse()
.map_err(|_| "Invalid block_count")?;
let total_layers = repeating_layers + 1;
let layer_size = model_size / total_layers;
let kv_cache = estimate_kv_cache_internal(gguf.metadata.clone(), None)
.await
.map_err(|e| e.to_string())?;
let kv_cache_per_token = kv_cache.per_token_size;
if model_size == 0 || layer_size == 0 || kv_cache_per_token == 0 {
return Err("Invalid model/layer/cache sizes".into());
}
const RESERVE_BYTES: u64 = 2288490189;
const MIN_CONTEXT_LENGTH: u64 = 2048;
let model_max_ctx: u64 = gguf
.metadata
.get(&format!("{arch}.context_length"))
.and_then(|s| s.parse().ok())
.unwrap_or(8192);
let memory_percentages = HashMap::from([("high", 0.7), ("medium", 0.5), ("low", 0.4)]);
let multiplier = *memory_percentages
.get(memory_mode.as_str())
.ok_or("Invalid memory mode")?;
log::info!("Got GPUs:\n{:?}", &sys_info.gpus);
let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
log::info!(
"Total system memory reported from tauri_plugin_hardware(in bytes): {}",
&total_ram
);
let total_vram: u64 = if sys_info.gpus.is_empty() {
// On macOS with unified memory, GPU info may be empty
// Use total RAM as VRAM since memory is shared
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
total_ram
} else {
sys_info
.gpus
.iter()
.map(|g| g.total_memory * 1024 * 1024)
.sum::<u64>()
};
log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
let usable_vram: u64 = if total_vram > RESERVE_BYTES {
(((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64
} else {
0
};
log::info!("Usable vram calculated: {}", &usable_vram);
let usable_ram: u64 = if total_ram > RESERVE_BYTES {
(((total_ram - RESERVE_BYTES) as f64) * multiplier).max(0.0) as u64
} else {
0
};
log::info!("Usable ram calculated (in bytes): {}", &usable_ram);
let mut gpu_layers = 0;
let mut max_ctx_len = 0;
let mut no_offload_kv_cache = false;
let mut mode = ModelMode::Unsupported;
let mut offload_mmproj = false;
let mut batch_size = 2048;
let total_available_mem = usable_vram.saturating_add(usable_ram);
if model_size + mmproj_size > total_available_mem {
log::info!("Model not supported in this system!");
return Ok(ModelPlan {
gpu_layers: 0,
max_context_length: 0,
no_offload_kv_cache: true,
batch_size: 64,
mode: ModelMode::Unsupported,
offload_mmproj: false,
});
}
if mmproj_size > 0 {
offload_mmproj = true;
}
let kv_min_size = estimate_kv_cache_internal(gguf.metadata.clone(), Some(MIN_CONTEXT_LENGTH))
.await
.map_err(|e| e.to_string())?
.size;
if model_size + kv_min_size + mmproj_size <= usable_vram {
log::info!("Planning mode: Full GPU offload is possible.");
mode = ModelMode::GPU;
gpu_layers = total_layers;
let vram_left_for_ctx = usable_vram.saturating_sub(model_size);
let max_ctx_by_vram = (vram_left_for_ctx / kv_cache_per_token) as u64;
let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
max_ctx_len = requested_target.min(max_ctx_by_vram);
no_offload_kv_cache = false;
offload_mmproj = true;
} else {
let mut found_plan = false;
log::info!("Attempting VRAM-Maximized Hybrid plan (KV cache in VRAM only).");
for candidate_gpu_layers in (0..=total_layers).rev() {
let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
if vram_used_by_layers > usable_vram {
continue;
}
let ram_used_by_cpu_layers =
(total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
let required_ram_for_model = ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
if required_ram_for_model > usable_ram {
continue;
}
let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
let ctx_in_vram_only = (vram_left_for_kv / kv_cache_per_token) as u64;
if ctx_in_vram_only >= MIN_CONTEXT_LENGTH {
log::info!(
"Found VRAM-Maximized Hybrid plan with {} GPU layers.",
candidate_gpu_layers
);
mode = ModelMode::Hybrid;
gpu_layers = candidate_gpu_layers;
let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
max_ctx_len = requested_target.min(ctx_in_vram_only);
no_offload_kv_cache = false;
found_plan = true;
break;
}
}
if !found_plan {
log::info!("VRAM-Maximized plan not feasible. Falling back to Standard Hybrid (KV cache in VRAM+RAM).");
for candidate_gpu_layers in (0..=total_layers).rev() {
let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
if vram_used_by_layers > usable_vram {
continue;
}
let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
let kv_in_vram = (vram_left_for_kv / kv_cache_per_token) as u64;
let ram_used_by_cpu_layers =
(total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
let required_ram_for_model =
ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
if required_ram_for_model > usable_ram {
continue;
}
let available_ram_for_kv = usable_ram.saturating_sub(required_ram_for_model);
let kv_in_ram = (available_ram_for_kv / kv_cache_per_token) as u64;
let total_kv_tokens = kv_in_vram.saturating_add(kv_in_ram);
if total_kv_tokens >= MIN_CONTEXT_LENGTH {
log::info!(
"Found Standard Hybrid plan with {} GPU layers.",
candidate_gpu_layers
);
mode = if candidate_gpu_layers > 0 {
ModelMode::Hybrid
} else {
ModelMode::CPU
};
gpu_layers = candidate_gpu_layers;
let requested_target =
requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
let max_possible_ctx = total_kv_tokens.min(model_max_ctx);
max_ctx_len = requested_target.min(max_possible_ctx);
no_offload_kv_cache = kv_in_ram > 0 && kv_in_vram == 0;
found_plan = true;
break;
}
}
}
if !found_plan {
log::info!("No hybrid plan found. Attempting CPU-only plan.");
if model_size + mmproj_size <= usable_ram {
let available_ram_for_kv = usable_ram.saturating_sub(model_size + mmproj_size);
let kv_tokens = (available_ram_for_kv / kv_cache_per_token) as u64;
if kv_tokens >= MIN_CONTEXT_LENGTH {
mode = ModelMode::CPU;
gpu_layers = 0;
max_ctx_len = kv_tokens
.min(requested_ctx.unwrap_or(model_max_ctx))
.min(model_max_ctx);
no_offload_kv_cache = true;
offload_mmproj = false;
}
}
}
}
if let Some(req) = requested_ctx {
if req > 0 {
max_ctx_len = max_ctx_len.min(req);
}
}
max_ctx_len = max_ctx_len.min(model_max_ctx);
if max_ctx_len > 0 {
log::info!("Max context before power-of-2 adjustment: {}", max_ctx_len);
max_ctx_len = 1u64 << (63 - max_ctx_len.leading_zeros());
log::info!("Adjusted max context to power of 2: {}", max_ctx_len);
}
if mode == ModelMode::Unsupported {
if max_ctx_len >= MIN_CONTEXT_LENGTH {
// do nothing, plan is viable but wasn't assigned a mode
} else {
gpu_layers = 0;
max_ctx_len = 0;
offload_mmproj = false;
}
} else if max_ctx_len < MIN_CONTEXT_LENGTH {
log::info!(
"Final context length {} is less than minimum required {}. Marking as unsupported.",
max_ctx_len,
MIN_CONTEXT_LENGTH
);
mode = ModelMode::Unsupported;
gpu_layers = 0;
max_ctx_len = 0;
offload_mmproj = false;
}
if mode == ModelMode::Hybrid {
batch_size = 256;
} else if mode == ModelMode::CPU || no_offload_kv_cache || mode == ModelMode::Unsupported {
batch_size = 64;
}
if max_ctx_len > 0 {
batch_size = batch_size.min(max_ctx_len);
} else {
batch_size = 64;
}
if mode == ModelMode::CPU || no_offload_kv_cache {
offload_mmproj = false;
}
log::info!("Planned model load params: GPU Layers: {}, max_ctx_len: {}, kv_cache offload: {}, offload mmproj: {}, batch_size: {}",
gpu_layers, max_ctx_len, !no_offload_kv_cache, offload_mmproj, batch_size);
Ok(ModelPlan {
gpu_layers,
max_context_length: max_ctx_len,
no_offload_kv_cache,
offload_mmproj,
batch_size,
mode,
})
}

View File

@ -1,4 +1,4 @@
use serde::Serialize;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::convert::TryFrom;
use std::io;
@ -52,3 +52,42 @@ pub struct GgufMetadata {
pub tensor_count: u64,
pub metadata: HashMap<String, String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct KVCacheEstimate {
pub size: u64,
pub per_token_size: u64,
}
#[derive(Debug, thiserror::Error)]
pub enum KVCacheError {
#[error("Invalid metadata: architecture not found")]
ArchitectureNotFound,
#[error("Invalid metadata: block_count not found or invalid")]
BlockCountInvalid,
#[error("Invalid metadata: head_count not found or invalid")]
HeadCountInvalid,
#[error("Invalid metadata: embedding_length not found or invalid")]
EmbeddingLengthInvalid,
#[error("Invalid metadata: context_length not found or invalid")]
ContextLengthInvalid,
}
impl serde::Serialize for KVCacheError {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
pub enum ModelSupportStatus {
#[serde(rename = "RED")]
Red,
#[serde(rename = "YELLOW")]
Yellow,
#[serde(rename = "GREEN")]
Green,
}

View File

@ -0,0 +1,164 @@
use crate::gguf::helpers;
use crate::gguf::types::{GgufMetadata, KVCacheError, KVCacheEstimate};
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
// read gguf metadata
pub async fn read_gguf_metadata_internal(path: String) -> Result<GgufMetadata, String> {
if path.starts_with("http://") || path.starts_with("https://") {
// Remote: read in 2MB chunks until successful
let client = reqwest::Client::new();
let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
let mut total_downloaded = 0;
let mut accumulated_data = Vec::new();
while total_downloaded < max_total_size {
let start = total_downloaded;
let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
let resp = client
.get(&path)
.header("Range", format!("bytes={}-{}", start, end))
.send()
.await
.map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
let chunk_data = resp
.bytes()
.await
.map_err(|e| format!("Failed to read chunk response: {}", e))?;
accumulated_data.extend_from_slice(&chunk_data);
total_downloaded += chunk_data.len();
// Try parsing after each chunk
let cursor = std::io::Cursor::new(&accumulated_data);
if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
return Ok(metadata);
}
// If we got less data than expected, we've reached EOF
if chunk_data.len() < chunk_size {
break;
}
}
Err("Could not parse GGUF metadata from downloaded data".to_string())
} else {
// Local: use streaming file reader
let file =
File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
let reader = BufReader::new(file);
helpers::read_gguf_metadata(reader)
.map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
}
}
/// Estimate KVCache size from a given metadata
pub async fn estimate_kv_cache_internal(
meta: HashMap<String, String>,
ctx_size: Option<u64>,
) -> Result<KVCacheEstimate, KVCacheError> {
log::info!("Received ctx_size parameter: {:?}", ctx_size);
let arch = meta
.get("general.architecture")
.ok_or(KVCacheError::ArchitectureNotFound)?;
// Number of layers
let n_layer_key = format!("{}.block_count", arch);
let n_layer = meta
.get(&n_layer_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0)
.ok_or(KVCacheError::BlockCountInvalid)?;
// Attention heads (use kv heads if present, else full heads)
let n_head_key = format!("{}.attention.head_count", arch);
let n_head_kv_key = format!("{}.attention.head_count_kv", arch);
let n_head = meta
.get(&n_head_kv_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0)
.unwrap_or_else(|| {
meta.get(&n_head_key)
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0)
});
if n_head == 0 {
return Err(KVCacheError::HeadCountInvalid);
}
// Key/value dimensions
let key_len_key = format!("{}.attention.key_length", arch);
let val_len_key = format!("{}.attention.value_length", arch);
let key_len = meta
.get(&key_len_key)
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0);
let val_len = meta
.get(&val_len_key)
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0);
if key_len == 0 || val_len == 0 {
return Err(KVCacheError::EmbeddingLengthInvalid);
}
// Context length
let max_ctx_key = format!("{}.context_length", arch);
let max_ctx = meta
.get(&max_ctx_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0)
.ok_or(KVCacheError::ContextLengthInvalid)?;
let ctx_len = ctx_size.map(|size| size.min(max_ctx)).unwrap_or(max_ctx);
// Sliding window if present
let sliding_key = format!("{}.attention.sliding_window", arch);
let sliding_window = meta
.get(&sliding_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0);
// Assume fp16
const BYTES_PER_ELEMENT: u64 = 2;
// Per-token KV size
let kv_per_token = n_layer * n_head * (key_len + val_len) * BYTES_PER_ELEMENT;
// Pure full-attention cost
let full_cost = ctx_len * kv_per_token;
// Pure sliding-window cost (tiny, only keeps last W tokens)
let sliding_cost = sliding_window.map(|w| w * kv_per_token);
// Middle estimate: average of sliding + full if sliding_window is present
let chosen_size = if let Some(slide) = sliding_cost {
let middle = (full_cost + slide) / 2;
log::info!(
"KV estimates -> sliding: {} bytes (~{:.2} MB), full: {} bytes (~{:.2} MB), middle: {} bytes (~{:.2} MB)",
slide,
slide as f64 / (1024.0 * 1024.0),
full_cost,
full_cost as f64 / (1024.0 * 1024.0),
middle,
middle as f64 / (1024.0 * 1024.0)
);
middle
} else {
log::info!(
"KV estimate (no SWA detected) -> full: {} bytes (~{:.2} MB)",
full_cost,
full_cost as f64 / (1024.0 * 1024.0)
);
full_cost
};
Ok(KVCacheEstimate {
size: chosen_size,
per_token_size: kv_per_token,
})
}

View File

@ -33,6 +33,10 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
commands::get_session_by_model,
// GGUF commands
gguf::commands::read_gguf_metadata,
gguf::commands::estimate_kv_cache_size,
gguf::commands::get_model_size,
gguf::commands::is_model_supported,
gguf::model_planner::plan_model_load
])
.setup(|app, _api| {
// Initialize and manage the plugin state

View File

@ -33,6 +33,22 @@ pub fn mkdir<R: Runtime>(app_handle: tauri::AppHandle<R>, args: Vec<String>) ->
fs::create_dir_all(&path).map_err(|e| e.to_string())
}
#[tauri::command]
pub fn mv<R: Runtime>(app_handle: tauri::AppHandle<R>, args: Vec<String>) -> Result<(), String> {
if args.len() < 2 || args[0].is_empty() || args[1].is_empty() {
return Err("mv error: Invalid argument - source and destination required".to_string());
}
let source = resolve_path(app_handle.clone(), &args[0]);
let destination = resolve_path(app_handle, &args[1]);
if !source.exists() {
return Err("mv error: Source path does not exist".to_string());
}
fs::rename(&source, &destination).map_err(|e| e.to_string())
}
#[tauri::command]
pub fn join_path<R: Runtime>(
app_handle: tauri::AppHandle<R>,
@ -193,7 +209,7 @@ pub fn decompress(app: tauri::AppHandle, path: &str, output_dir: &str) -> Result
fs::File::open(&path_buf).map_err(|e| e.to_string())?
}
};
#[cfg(not(windows))]
let file = fs::File::open(&path_buf).map_err(|e| e.to_string())?;
if path.ends_with(".tar.gz") {
@ -222,7 +238,10 @@ pub fn decompress(app: tauri::AppHandle, path: &str, output_dir: &str) -> Result
{
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = entry.unix_mode() {
let _ = std::fs::set_permissions(&outpath, std::fs::Permissions::from_mode(mode));
let _ = std::fs::set_permissions(
&outpath,
std::fs::Permissions::from_mode(mode),
);
}
}
}

View File

@ -8,7 +8,6 @@ use core::{
};
use jan_utils::generate_app_token;
use std::{collections::HashMap, sync::Arc};
use tauri_plugin_deep_link::DeepLinkExt;
use tauri::{Emitter, Manager, RunEvent};
use tauri_plugin_llamacpp::cleanup_llama_processes;
use tokio::sync::Mutex;
@ -54,6 +53,7 @@ pub fn run() {
core::filesystem::commands::readdir_sync,
core::filesystem::commands::read_file_sync,
core::filesystem::commands::rm,
core::filesystem::commands::mv,
core::filesystem::commands::file_stat,
core::filesystem::commands::write_file_sync,
core::filesystem::commands::write_yaml,
@ -163,6 +163,8 @@ pub fn run() {
#[cfg(any(windows, target_os = "linux"))]
{
use tauri_plugin_deep_link::DeepLinkExt;
app.deep_link().register_all()?;
}
setup_mcp(app);

File diff suppressed because it is too large Load Diff

View File

@ -1,10 +1,13 @@
{
"bundle": {
"targets": ["nsis"],
"resources": ["resources/pre-install/**/*"],
"resources": ["resources/pre-install/**/*", "resources/lib/vulkan-1.dll", "resources/lib/vc_redist.x64.exe", "resources/LICENSE"],
"externalBin": ["resources/bin/bun", "resources/bin/uv"],
"windows": {
"signCommand": "powershell -ExecutionPolicy Bypass -File ./sign.ps1 %1",
"nsis": {
"installerHooks": "./windows/hooks.nsh",
"installerIcon": "icons/icon.ico"
},
"webviewInstallMode": {
"silent": true,
"type": "downloadBootstrapper"

View File

@ -0,0 +1,65 @@
!macro NSIS_HOOK_POSTINSTALL
; Check if Visual C++ Redistributable is already installed
ReadRegStr $0 HKLM "SOFTWARE\Microsoft\VisualStudio\14.0\VC\Runtimes\x64" "Version"
${If} $0 == ""
; Try alternative registry location
ReadRegStr $0 HKLM "SOFTWARE\WOW6432Node\Microsoft\VisualStudio\14.0\VC\Runtimes\x64" "Version"
${EndIf}
${If} $0 == ""
; VC++ Redistributable not found, need to install
DetailPrint "Visual C++ Redistributable not found, installing from bundled file..."
; Install from bundled EXE if not installed
${If} ${FileExists} "$INSTDIR\resources\lib\vc_redist.x64.exe"
DetailPrint "Installing Visual C++ Redistributable..."
; Copy to TEMP folder and then execute installer
CopyFiles "$INSTDIR\resources\lib\vc_redist.x64.exe" "$TEMP\vc_redist.x64.exe"
ExecWait '"$TEMP\vc_redist.x64.exe" /quiet /norestart' $1
; Check whether installation process exited successfully (code 0) or not
${If} $1 == 0
DetailPrint "Visual C++ Redistributable installed successfully"
${ElseIf} $1 == 1638
DetailPrint "Visual C++ Redistributable already installed (newer version)"
${ElseIf} $1 == 3010
DetailPrint "Visual C++ Redistributable installed successfully (restart required)"
${Else}
DetailPrint "Visual C++ installation failed with exit code: $1"
${EndIf}
; Clean up setup files from TEMP and your installed app
Delete "$TEMP\vc_redist.x64.exe"
Delete "$INSTDIR\resources\lib\vc_redist.x64.exe"
${Else}
DetailPrint "Visual C++ Redistributable not found at expected location: $INSTDIR\resources\lib\vc_redist.x64.exe"
${EndIf}
${Else}
DetailPrint "Visual C++ Redistributable already installed (version: $0)"
${EndIf}
; ---- Copy LICENSE to install root ----
${If} ${FileExists} "$INSTDIR\resources\LICENSE"
CopyFiles /SILENT "$INSTDIR\resources\LICENSE" "$INSTDIR\LICENSE"
DetailPrint "Copied LICENSE to install root"
; Optional cleanup - remove from resources folder
Delete "$INSTDIR\resources\LICENSE"
${Else}
DetailPrint "LICENSE not found at expected location: $INSTDIR\resources\LICENSE"
${EndIf}
; ---- Copy vulkan-1.dll to install root ----
${If} ${FileExists} "$INSTDIR\resources\lib\vulkan-1.dll"
CopyFiles /SILENT "$INSTDIR\resources\lib\vulkan-1.dll" "$INSTDIR\vulkan-1.dll"
DetailPrint "Copied vulkan-1.dll to install root"
; Optional cleanup - remove from resources folder
Delete "$INSTDIR\resources\lib\vulkan-1.dll"
; Only remove the lib directory if it's empty after removing both files
RMDir "$INSTDIR\resources\lib"
${Else}
DetailPrint "vulkan-1.dll not found at expected location: $INSTDIR\resources\lib\vulkan-1.dll"
${EndIf}
!macroend

View File

@ -18,112 +18,112 @@
"build:serve:web": "yarn build:web && yarn serve:web"
},
"dependencies": {
"@dnd-kit/core": "^6.3.1",
"@dnd-kit/modifiers": "^9.0.0",
"@dnd-kit/sortable": "^10.0.0",
"@dnd-kit/core": "6.3.1",
"@dnd-kit/modifiers": "9.0.0",
"@dnd-kit/sortable": "10.0.0",
"@jan/extensions-web": "link:../extensions-web",
"@janhq/core": "link:../core",
"@radix-ui/react-accordion": "^1.2.10",
"@radix-ui/react-avatar": "^1.1.10",
"@radix-ui/react-dialog": "^1.1.14",
"@radix-ui/react-dropdown-menu": "^2.1.15",
"@radix-ui/react-hover-card": "^1.1.14",
"@radix-ui/react-popover": "^1.1.13",
"@radix-ui/react-progress": "^1.1.4",
"@radix-ui/react-radio-group": "^1.3.7",
"@radix-ui/react-slider": "^1.3.2",
"@radix-ui/react-slot": "^1.2.0",
"@radix-ui/react-switch": "^1.2.2",
"@radix-ui/react-tooltip": "^1.2.4",
"@tabler/icons-react": "^3.33.0",
"@tailwindcss/vite": "^4.1.4",
"@tanstack/react-router": "^1.116.0",
"@tanstack/react-router-devtools": "^1.121.34",
"@tanstack/react-virtual": "^3.13.12",
"@tauri-apps/api": "^2.8.0",
"@radix-ui/react-accordion": "1.2.11",
"@radix-ui/react-avatar": "1.1.10",
"@radix-ui/react-dialog": "1.1.15",
"@radix-ui/react-dropdown-menu": "2.1.16",
"@radix-ui/react-hover-card": "1.1.14",
"@radix-ui/react-popover": "1.1.14",
"@radix-ui/react-progress": "1.1.4",
"@radix-ui/react-radio-group": "1.3.8",
"@radix-ui/react-slider": "1.3.2",
"@radix-ui/react-slot": "1.2.0",
"@radix-ui/react-switch": "1.2.2",
"@radix-ui/react-tooltip": "1.2.4",
"@tabler/icons-react": "3.34.0",
"@tailwindcss/vite": "4.1.4",
"@tanstack/react-router": "1.117.0",
"@tanstack/react-router-devtools": "1.121.34",
"@tanstack/react-virtual": "3.13.12",
"@tauri-apps/api": "2.8.0",
"@tauri-apps/plugin-deep-link": "2.4.3",
"@tauri-apps/plugin-dialog": "^2.2.1",
"@tauri-apps/plugin-http": "^2.2.1",
"@tauri-apps/plugin-opener": "^2.2.7",
"@tauri-apps/plugin-os": "^2.2.1",
"@tauri-apps/plugin-updater": "^2.7.1",
"@types/react-syntax-highlighter": "^15.5.13",
"@types/uuid": "^10.0.0",
"@uiw/react-textarea-code-editor": "^3.1.1",
"class-variance-authority": "^0.7.1",
"culori": "^4.0.1",
"emoji-picker-react": "^4.12.2",
"framer-motion": "^12.23.12",
"fuse.js": "^7.1.0",
"fzf": "^0.5.2",
"i18next": "^25.0.1",
"katex": "^0.16.22",
"lodash.clonedeep": "^4.5.0",
"lodash.debounce": "^4.0.8",
"lucide-react": "^0.536.0",
"motion": "^12.10.5",
"next-themes": "^0.4.6",
"posthog-js": "^1.246.0",
"react": "^19.0.0",
"react-colorful": "^5.6.1",
"react-dom": "^19.0.0",
"react-i18next": "^15.5.1",
"react-joyride": "^2.9.3",
"react-markdown": "^10.1.0",
"react-resizable-panels": "^3.0.3",
"react-syntax-highlighter": "^15.6.1",
"react-syntax-highlighter-virtualized-renderer": "^1.1.0",
"react-textarea-autosize": "^8.5.9",
"rehype-katex": "^7.0.1",
"rehype-raw": "^7.0.0",
"remark-breaks": "^4.0.0",
"remark-emoji": "^5.0.1",
"remark-gfm": "^4.0.1",
"remark-math": "^6.0.0",
"sonner": "^2.0.3",
"tailwindcss": "^4.1.4",
"@tauri-apps/plugin-dialog": "2.2.2",
"@tauri-apps/plugin-http": "2.5.0",
"@tauri-apps/plugin-opener": "2.3.0",
"@tauri-apps/plugin-os": "2.2.1",
"@tauri-apps/plugin-updater": "2.8.1",
"@types/react-syntax-highlighter": "15.5.13",
"@types/uuid": "10.0.0",
"@uiw/react-textarea-code-editor": "3.1.1",
"class-variance-authority": "0.7.1",
"culori": "4.0.1",
"emoji-picker-react": "4.12.2",
"framer-motion": "12.23.12",
"fuse.js": "7.1.0",
"fzf": "0.5.2",
"i18next": "25.0.2",
"katex": "0.16.22",
"lodash.clonedeep": "4.5.0",
"lodash.debounce": "4.0.8",
"lucide-react": "0.536.0",
"motion": "12.18.1",
"next-themes": "0.4.6",
"posthog-js": "1.255.1",
"react": "19.0.0",
"react-colorful": "5.6.1",
"react-dom": "19.0.0",
"react-i18next": "15.5.1",
"react-joyride": "2.9.3",
"react-markdown": "10.1.0",
"react-resizable-panels": "3.0.5",
"react-syntax-highlighter": "15.6.1",
"react-syntax-highlighter-virtualized-renderer": "1.1.0",
"react-textarea-autosize": "8.5.9",
"rehype-katex": "7.0.1",
"rehype-raw": "7.0.0",
"remark-breaks": "4.0.0",
"remark-emoji": "5.0.1",
"remark-gfm": "4.0.1",
"remark-math": "6.0.0",
"sonner": "2.0.5",
"tailwindcss": "4.1.4",
"token.js": "npm:token.js-fork@0.7.27",
"tw-animate-css": "^1.2.7",
"ulidx": "^2.4.1",
"unified": "^11.0.5",
"uuid": "^11.1.0",
"vaul": "^1.1.2",
"zustand": "^5.0.3"
"tw-animate-css": "1.2.8",
"ulidx": "2.4.1",
"unified": "11.0.5",
"uuid": "11.1.0",
"vaul": "1.1.2",
"zustand": "5.0.3"
},
"devDependencies": {
"@eslint/js": "^9.22.0",
"@tanstack/router-plugin": "^1.116.1",
"@testing-library/dom": "^10.4.0",
"@testing-library/jest-dom": "^6.6.3",
"@testing-library/react": "^16.3.0",
"@testing-library/user-event": "^14.6.1",
"@types/culori": "^2.1.1",
"@types/istanbul-lib-report": "^3",
"@types/istanbul-reports": "^3",
"@types/lodash.clonedeep": "^4",
"@types/lodash.debounce": "^4",
"@types/node": "^22.14.1",
"@types/react": "^19.0.10",
"@types/react-dom": "^19.0.4",
"@vitejs/plugin-react": "^4.3.4",
"@eslint/js": "8.57.0",
"@tanstack/router-plugin": "1.117.0",
"@testing-library/dom": "10.4.1",
"@testing-library/jest-dom": "6.8.0",
"@testing-library/react": "16.3.0",
"@testing-library/user-event": "14.6.1",
"@types/culori": "2.1.1",
"@types/istanbul-lib-report": "3.0.3",
"@types/istanbul-reports": "3.0.4",
"@types/lodash.clonedeep": "4.5.9",
"@types/lodash.debounce": "4.0.9",
"@types/node": "22.14.1",
"@types/react": "19.1.2",
"@types/react-dom": "19.1.2",
"@vitejs/plugin-react": "4.4.1",
"@vitest/coverage-v8": "3.2.4",
"clsx": "^2.1.1",
"eslint": "^9.22.0",
"eslint-plugin-react-hooks": "^5.2.0",
"eslint-plugin-react-refresh": "^0.4.19",
"globals": "^16.0.0",
"istanbul-api": "^3.0.0",
"istanbul-lib-coverage": "^3.2.2",
"istanbul-lib-report": "^3.0.1",
"istanbul-reports": "^3.1.7",
"jsdom": "^26.1.0",
"serve": "^14.2.4",
"tailwind-merge": "^3.3.1",
"typescript": "~5.8.3",
"typescript-eslint": "^8.26.1",
"vite": "^6.3.0",
"vite-plugin-node-polyfills": "^0.23.0",
"vite-plugin-pwa": "^1.0.3",
"vitest": "^3.1.3"
"clsx": "2.1.1",
"eslint": "9.25.1",
"eslint-plugin-react-hooks": "5.2.0",
"eslint-plugin-react-refresh": "0.4.20",
"globals": "16.0.0",
"istanbul-api": "3.0.0",
"istanbul-lib-coverage": "2.0.5",
"istanbul-lib-report": "2.0.8",
"istanbul-reports": "3.1.7",
"jsdom": "26.1.0",
"serve": "14.2.5",
"tailwind-merge": "3.3.1",
"typescript": "5.9.2",
"typescript-eslint": "8.31.0",
"vite": "6.3.2",
"vite-plugin-node-polyfills": "0.23.0",
"vite-plugin-pwa": "1.0.3",
"vitest": "3.2.4"
}
}

View File

@ -2,6 +2,7 @@ import { render, screen, fireEvent } from '@testing-library/react'
import { describe, it, expect, vi } from 'vitest'
import userEvent from '@testing-library/user-event'
import React from 'react'
import '@testing-library/jest-dom'
import {
Dialog,
DialogTrigger,
@ -117,7 +118,7 @@ describe('Dialog Components', () => {
it('applies proper classes to dialog content', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
@ -128,27 +129,38 @@ describe('Dialog Components', () => {
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
const dialogContent = screen.getByRole('dialog')
expect(dialogContent).toHaveClass(
'bg-main-view',
'max-h-[calc(100%-80px)]',
'overflow-auto',
'border-main-view-fg/10',
'text-main-view-fg',
'fixed',
'top-[50%]',
'left-[50%]',
'z-50',
'z-[90]',
'grid',
'w-full',
'max-w-[calc(100%-2rem)]',
'translate-x-[-50%]',
'translate-y-[-50%]',
'border',
'gap-4',
'rounded-lg',
'shadow-lg'
'border',
'p-6',
'shadow-lg',
'duration-200',
'sm:max-w-lg'
)
})
it('applies proper classes to dialog header', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
@ -159,11 +171,11 @@ describe('Dialog Components', () => {
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
const dialogHeader = screen.getByText('Dialog Title').closest('div')
expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center')
expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center', 'sm:text-left')
})
it('applies proper classes to dialog title', async () => {
@ -299,7 +311,7 @@ describe('Dialog Components', () => {
it('supports onOpenChange callback', async () => {
const onOpenChange = vi.fn()
const user = userEvent.setup()
render(
<Dialog onOpenChange={onOpenChange}>
<DialogTrigger>Open Dialog</DialogTrigger>
@ -310,9 +322,98 @@ describe('Dialog Components', () => {
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
expect(onOpenChange).toHaveBeenCalledWith(true)
})
it('can hide close button when showCloseButton is false', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent showCloseButton={false}>
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
</DialogHeader>
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
expect(screen.queryByRole('button', { name: /close/i })).not.toBeInTheDocument()
})
it('shows close button by default', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent>
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
</DialogHeader>
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
expect(screen.getByRole('button', { name: /close/i })).toBeInTheDocument()
})
it('accepts aria-describedby prop', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent aria-describedby="custom-description">
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
</DialogHeader>
<p id="custom-description">Custom description text</p>
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
const dialogContent = screen.getByRole('dialog')
expect(dialogContent).toHaveAttribute('aria-describedby', 'custom-description')
})
it('applies data-slot attributes to components', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent>
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
<DialogDescription>Dialog description</DialogDescription>
</DialogHeader>
<div>Dialog body content</div>
<DialogFooter>
<button>Footer button</button>
</DialogFooter>
</DialogContent>
</Dialog>
)
expect(screen.getByText('Open Dialog')).toHaveAttribute('data-slot', 'dialog-trigger')
await user.click(screen.getByText('Open Dialog'))
expect(screen.getByRole('dialog')).toHaveAttribute('data-slot', 'dialog-content')
expect(screen.getByText('Dialog Title').closest('div')).toHaveAttribute('data-slot', 'dialog-header')
expect(screen.getByText('Dialog Title')).toHaveAttribute('data-slot', 'dialog-title')
expect(screen.getByText('Dialog description')).toHaveAttribute('data-slot', 'dialog-description')
expect(screen.getByText('Footer button').closest('div')).toHaveAttribute('data-slot', 'dialog-footer')
})
})

View File

@ -37,7 +37,7 @@ function DialogOverlay({
<DialogPrimitive.Overlay
data-slot="dialog-overlay"
className={cn(
'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-main-view/80 backdrop-blur-sm',
'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-[80] bg-main-view/80 backdrop-blur-sm',
className
)}
{...props}
@ -67,7 +67,7 @@ function DialogContent({
data-slot="dialog-content"
aria-describedby={ariaDescribedBy}
className={cn(
'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-[90] grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
className
)}
{...props}

View File

@ -21,4 +21,5 @@ export const localStorageKey = {
lastUsedAssistant: 'last-used-assistant',
favoriteModels: 'favorite-models',
setupCompleted: 'setup-completed',
threadManagement: 'thread-management',
}

View File

@ -3,6 +3,8 @@ export const route = {
home: '/',
appLogs: '/logs',
assistant: '/assistant',
project: '/project',
projectDetail: '/project/$projectId',
settings: {
index: '/settings',
model_providers: '/settings/providers',

View File

@ -4,6 +4,7 @@ import TextareaAutosize from 'react-textarea-autosize'
import { cn } from '@/lib/utils'
import { usePrompt } from '@/hooks/usePrompt'
import { useThreads } from '@/hooks/useThreads'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useCallback, useEffect, useRef, useState } from 'react'
import { Button } from '@/components/ui/button'
import {
@ -43,9 +44,15 @@ type ChatInputProps = {
showSpeedToken?: boolean
model?: ThreadModel
initialMessage?: boolean
projectId?: string
}
const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const ChatInput = ({
model,
className,
initialMessage,
projectId,
}: ChatInputProps) => {
const textareaRef = useRef<HTMLTextAreaElement>(null)
const [isFocused, setIsFocused] = useState(false)
const [rows, setRows] = useState(1)
@ -58,6 +65,8 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const prompt = usePrompt((state) => state.prompt)
const setPrompt = usePrompt((state) => state.setPrompt)
const currentThreadId = useThreads((state) => state.currentThreadId)
const updateThread = useThreads((state) => state.updateThread)
const { getFolderById } = useThreadManagement()
const { t } = useTranslation()
const spellCheckChatInput = useGeneralSetting(
(state) => state.spellCheckChatInput
@ -177,6 +186,28 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
uploadedFiles.length > 0 ? uploadedFiles : undefined
)
setUploadedFiles([])
// Handle project assignment for new threads
if (projectId && !currentThreadId) {
const project = getFolderById(projectId)
if (project) {
// Use setTimeout to ensure the thread is created first
setTimeout(() => {
const newCurrentThreadId = useThreads.getState().currentThreadId
if (newCurrentThreadId) {
updateThread(newCurrentThreadId, {
metadata: {
project: {
id: project.id,
name: project.name,
updated_at: project.updated_at,
},
},
})
}
}, 100)
}
}
}
useEffect(() => {

View File

@ -0,0 +1,142 @@
import { Button } from '@/components/ui/button'
import { Progress } from '@/components/ui/progress'
import { useDownloadStore } from '@/hooks/useDownloadStore'
import { useGeneralSetting } from '@/hooks/useGeneralSetting'
import { useModelProvider } from '@/hooks/useModelProvider'
import { useServiceHub } from '@/hooks/useServiceHub'
import { useTranslation } from '@/i18n'
import { extractModelName } from '@/lib/models'
import { cn, sanitizeModelId } from '@/lib/utils'
import { CatalogModel } from '@/services/models/types'
import { useCallback, useMemo } from 'react'
import { useShallow } from 'zustand/shallow'
type ModelProps = {
model: CatalogModel
handleUseModel: (modelId: string) => void
}
const defaultModelQuantizations = ['iq4_xs', 'q4_k_m']
export function DownloadButtonPlaceholder({
model,
handleUseModel,
}: ModelProps) {
const { downloads, localDownloadingModels, addLocalDownloadingModel } =
useDownloadStore(
useShallow((state) => ({
downloads: state.downloads,
localDownloadingModels: state.localDownloadingModels,
addLocalDownloadingModel: state.addLocalDownloadingModel,
}))
)
const { t } = useTranslation()
const getProviderByName = useModelProvider((state) => state.getProviderByName)
const llamaProvider = getProviderByName('llamacpp')
const serviceHub = useServiceHub()
const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)
const quant =
model.quants.find((e) =>
defaultModelQuantizations.some((m) =>
e.model_id.toLowerCase().includes(m)
)
) ?? model.quants[0]
const modelId = quant?.model_id || model.model_name
const downloadProcesses = useMemo(
() =>
Object.values(downloads).map((download) => ({
id: download.name,
name: download.name,
progress: download.progress,
current: download.current,
total: download.total,
})),
[downloads]
)
const isRecommendedModel = useCallback((modelId: string) => {
return (extractModelName(modelId)?.toLowerCase() ===
'jan-nano-gguf') as boolean
}, [])
if (model.quants.length === 0) {
return (
<div className="flex items-center gap-2">
<Button
size="sm"
onClick={() => {
window.open(`https://huggingface.co/${model.model_name}`, '_blank')
}}
>
View on HuggingFace
</Button>
</div>
)
}
const modelUrl = quant?.path || modelId
const isDownloading =
localDownloadingModels.has(modelId) ||
downloadProcesses.some((e) => e.id === modelId)
const downloadProgress =
downloadProcesses.find((e) => e.id === modelId)?.progress || 0
const isDownloaded = llamaProvider?.models.some(
(m: { id: string }) =>
m.id === modelId ||
m.id === `${model.developer}/${sanitizeModelId(modelId)}`
)
const isRecommended = isRecommendedModel(model.model_name)
const handleDownload = () => {
// Immediately set local downloading state
addLocalDownloadingModel(modelId)
const mmprojPath = (
model.mmproj_models?.find(
(e) => e.model_id.toLowerCase() === 'mmproj-f16'
) || model.mmproj_models?.[0]
)?.path
serviceHub
.models()
.pullModelWithMetadata(modelId, modelUrl, mmprojPath, huggingfaceToken)
}
return (
<div
className={cn(
'flex items-center',
isRecommended && 'hub-download-button-step'
)}
>
{isDownloading && !isDownloaded && (
<div className={cn('flex items-center gap-2 w-20')}>
<Progress value={downloadProgress * 100} />
<span className="text-xs text-center text-main-view-fg/70">
{Math.round(downloadProgress * 100)}%
</span>
</div>
)}
{isDownloaded ? (
<Button
size="sm"
onClick={() => handleUseModel(modelId)}
data-test-id={`hub-model-${modelId}`}
>
{t('hub:use')}
</Button>
) : (
<Button
data-test-id={`hub-model-${modelId}`}
size="sm"
onClick={handleDownload}
className={cn(isDownloading && 'hidden')}
>
{t('hub:download')}
</Button>
)}
</div>
)
}

View File

@ -400,20 +400,33 @@ export function DownloadManagement() {
className="text-main-view-fg/70 cursor-pointer"
title="Cancel download"
onClick={() => {
serviceHub.models().abortDownload(download.name).then(() => {
toast.info(
t('common:toast.downloadCancelled.title'),
{
id: 'cancel-download',
description: t(
'common:toast.downloadCancelled.description'
),
}
)
if (downloadProcesses.length === 0) {
setIsPopoverOpen(false)
}
})
// TODO: Consolidate cancellation logic
if (download.id.startsWith('llamacpp')) {
const downloadManager =
window.core.extensionManager.getByName(
'@janhq/download-extension'
)
downloadManager.cancelDownload(download.id)
} else {
serviceHub
.models()
.abortDownload(download.name)
.then(() => {
toast.info(
t('common:toast.downloadCancelled.title'),
{
id: 'cancel-download',
description: t(
'common:toast.downloadCancelled.description'
),
}
)
if (downloadProcesses.length === 0) {
setIsPopoverOpen(false)
}
})
}
}}
/>
</div>

View File

@ -4,14 +4,18 @@ import { cn } from '@/lib/utils'
import {
IconLayoutSidebar,
IconDots,
IconCirclePlusFilled,
IconSettingsFilled,
IconCirclePlus,
IconSettings,
IconStar,
IconMessageFilled,
IconAppsFilled,
IconFolderPlus,
IconMessage,
IconApps,
IconX,
IconSearch,
IconClipboardSmileFilled,
IconClipboardSmile,
IconFolder,
IconPencil,
IconTrash,
} from '@tabler/icons-react'
import { route } from '@/constants/routes'
import ThreadList from './ThreadList'
@ -28,6 +32,7 @@ import { UserProfileMenu } from '@/containers/auth/UserProfileMenu'
import { useAuth } from '@/hooks/useAuth'
import { useThreads } from '@/hooks/useThreads'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useTranslation } from '@/i18n/react-i18next-compat'
import { useMemo, useState, useEffect, useRef } from 'react'
@ -35,40 +40,42 @@ import { toast } from 'sonner'
import { DownloadManagement } from '@/containers/DownloadManegement'
import { useSmallScreen } from '@/hooks/useMediaQuery'
import { useClickOutside } from '@/hooks/useClickOutside'
import { useDownloadStore } from '@/hooks/useDownloadStore'
import { DeleteAllThreadsDialog } from '@/containers/dialogs'
import AddProjectDialog from '@/containers/dialogs/AddProjectDialog'
import { DeleteProjectDialog } from '@/containers/dialogs/DeleteProjectDialog'
const mainMenus = [
{
title: 'common:newChat',
icon: IconCirclePlusFilled,
icon: IconCirclePlus,
route: route.home,
isEnabled: true,
},
{
title: 'common:projects.title',
icon: IconFolderPlus,
route: route.project,
isEnabled: true,
},
{
title: 'common:assistants',
icon: IconClipboardSmileFilled,
icon: IconClipboardSmile,
route: route.assistant,
isEnabled: PlatformFeatures[PlatformFeature.ASSISTANTS],
},
{
title: 'common:hub',
icon: IconAppsFilled,
icon: IconApps,
route: route.hub.index,
isEnabled: PlatformFeatures[PlatformFeature.MODEL_HUB],
},
{
title: 'common:settings',
icon: IconSettingsFilled,
icon: IconSettings,
route: route.settings.general,
isEnabled: true,
},
{
title: 'common:authentication',
icon: null,
route: null,
isEnabled: PlatformFeatures[PlatformFeature.AUTHENTICATION],
},
]
const LeftPanel = () => {
@ -122,7 +129,7 @@ const LeftPanel = () => {
) {
if (currentIsSmallScreen && open) {
setLeftPanel(false)
} else if(!open) {
} else if (!open) {
setLeftPanel(true)
}
prevScreenSizeRef.current = currentIsSmallScreen
@ -152,20 +159,65 @@ const LeftPanel = () => {
const getFilteredThreads = useThreads((state) => state.getFilteredThreads)
const threads = useThreads((state) => state.threads)
const { folders, addFolder, updateFolder, deleteFolder, getFolderById } =
useThreadManagement()
// Project dialog states
const [projectDialogOpen, setProjectDialogOpen] = useState(false)
const [editingProjectKey, setEditingProjectKey] = useState<string | null>(
null
)
const [deleteProjectConfirmOpen, setDeleteProjectConfirmOpen] =
useState(false)
const [deletingProjectId, setDeletingProjectId] = useState<string | null>(
null
)
const filteredThreads = useMemo(() => {
return getFilteredThreads(searchTerm)
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [getFilteredThreads, searchTerm, threads])
const filteredProjects = useMemo(() => {
if (!searchTerm) return folders
return folders.filter((folder) =>
folder.name.toLowerCase().includes(searchTerm.toLowerCase())
)
}, [folders, searchTerm])
// Memoize categorized threads based on filteredThreads
const favoritedThreads = useMemo(() => {
return filteredThreads.filter((t) => t.isFavorite)
}, [filteredThreads])
const unFavoritedThreads = useMemo(() => {
return filteredThreads.filter((t) => !t.isFavorite)
return filteredThreads.filter((t) => !t.isFavorite && !t.metadata?.project)
}, [filteredThreads])
// Project handlers
const handleProjectDelete = (id: string) => {
setDeletingProjectId(id)
setDeleteProjectConfirmOpen(true)
}
const confirmProjectDelete = () => {
if (deletingProjectId) {
deleteFolder(deletingProjectId)
setDeleteProjectConfirmOpen(false)
setDeletingProjectId(null)
}
}
const handleProjectSave = (name: string) => {
if (editingProjectKey) {
updateFolder(editingProjectKey, name)
} else {
addFolder(name)
}
setProjectDialogOpen(false)
setEditingProjectKey(null)
}
// Disable body scroll when panel is open on small screens
useEffect(() => {
if (isSmallScreen && open) {
@ -179,8 +231,6 @@ const LeftPanel = () => {
}
}, [isSmallScreen, open])
const { downloads, localDownloadingModels } = useDownloadStore()
return (
<>
{/* Backdrop overlay for small screens */}
@ -262,22 +312,12 @@ const LeftPanel = () => {
)}
</div>
<div className="flex flex-col justify-between overflow-hidden mt-0 !h-[calc(100%-42px)]">
<div
className={cn(
'flex flex-col',
Object.keys(downloads).length > 0 || localDownloadingModels.size > 0
? 'h-[calc(100%-200px)]'
: 'h-[calc(100%-140px)]'
)}
>
<div className="flex flex-col gap-y-1 overflow-hidden mt-0 !h-[calc(100%-42px)]">
<div className="space-y-1 py-1">
{IS_MACOS && (
<div
ref={searchContainerMacRef}
className={cn(
'relative mb-4 mt-1',
isResizableContext ? 'mx-2' : 'mx-1'
)}
className={cn('relative mb-2 mt-1 mx-1')}
data-ignore-outside-clicks
>
<IconSearch className="absolute size-4 top-1/2 left-2 -translate-y-1/2 text-left-panel-fg/50" />
@ -303,7 +343,151 @@ const LeftPanel = () => {
)}
</div>
)}
<div className="flex flex-col w-full overflow-y-auto overflow-x-hidden">
{mainMenus.map((menu) => {
if (!menu.isEnabled) {
return null
}
// Handle authentication menu specially
if (menu.title === 'common:authentication') {
return (
<div key={menu.title}>
<div className="mx-1 my-2 border-t border-left-panel-fg/5" />
{isAuthenticated ? (
<UserProfileMenu />
) : (
<AuthLoginButton />
)}
</div>
)
}
// Regular menu items must have route and icon
if (!menu.route || !menu.icon) return null
const isActive = (() => {
// Settings routes
if (menu.route.includes(route.settings.index)) {
return currentPath.includes(route.settings.index)
}
// Default exact match for other routes
return currentPath === menu.route
})()
return (
<Link
key={menu.title}
to={menu.route}
onClick={() => isSmallScreen && setLeftPanel(false)}
data-test-id={`menu-${menu.title}`}
activeOptions={{ exact: true }}
className={cn(
'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
isActive && 'bg-left-panel-fg/10'
)}
>
<menu.icon size={18} className="text-left-panel-fg/70" />
<span className="font-medium text-left-panel-fg/90">
{t(menu.title)}
</span>
</Link>
)
})}
</div>
{filteredProjects.length > 0 && (
<div className="space-y-1 py-1">
<div className="flex items-center justify-between mb-2">
<span className="block text-xs text-left-panel-fg/50 px-1 font-semibold">
{t('common:projects.title')}
</span>
</div>
<div className="flex flex-col max-h-[140px] overflow-y-scroll">
{filteredProjects
.slice()
.sort((a, b) => b.updated_at - a.updated_at)
.map((folder) => {
const ProjectItem = () => {
const [openDropdown, setOpenDropdown] = useState(false)
const isProjectActive =
currentPath === `/project/${folder.id}`
return (
<div key={folder.id} className="mb-1">
<div
className={cn(
'rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/project-list transition-all cursor-pointer',
isProjectActive && 'bg-left-panel-fg/10'
)}
>
<Link
to="/project/$projectId"
params={{ projectId: folder.id }}
onClick={() =>
isSmallScreen && setLeftPanel(false)
}
className="py-1 pr-2 truncate flex items-center gap-2 flex-1"
>
<IconFolder
size={16}
className="text-left-panel-fg/70 shrink-0"
/>
<span className="text-sm text-left-panel-fg/90 truncate">
{folder.name}
</span>
</Link>
<div className="flex items-center">
<DropdownMenu
open={openDropdown}
onOpenChange={(open) => setOpenDropdown(open)}
>
<DropdownMenuTrigger asChild>
<IconDots
size={14}
className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/project-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
onClick={(e) => {
e.preventDefault()
e.stopPropagation()
}}
/>
</DropdownMenuTrigger>
<DropdownMenuContent side="bottom" align="end">
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
setEditingProjectKey(folder.id)
setProjectDialogOpen(true)
}}
>
<IconPencil size={16} />
<span>Edit</span>
</DropdownMenuItem>
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
handleProjectDelete(folder.id)
}}
>
<IconTrash size={16} />
<span>Delete</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
</div>
</div>
)
}
return <ProjectItem key={folder.id} />
})}
</div>
</div>
)}
<div className="flex flex-col h-full overflow-y-scroll w-[calc(100%+6px)]">
<div className="flex flex-col w-full h-full overflow-y-auto overflow-x-hidden">
<div className="h-full w-full overflow-y-auto">
{favoritedThreads.length > 0 && (
<>
@ -379,7 +563,9 @@ const LeftPanel = () => {
</button>
</DropdownMenuTrigger>
<DropdownMenuContent side="bottom" align="end">
<DeleteAllThreadsDialog onDeleteAll={deleteAllThreads} />
<DeleteAllThreadsDialog
onDeleteAll={deleteAllThreads}
/>
</DropdownMenuContent>
</DropdownMenu>
</div>
@ -404,7 +590,7 @@ const LeftPanel = () => {
<>
<div className="px-1 mt-2">
<div className="flex items-center gap-1 text-left-panel-fg/80">
<IconMessageFilled size={18} />
<IconMessage size={18} />
<h6 className="font-medium text-base">
{t('common:noThreadsYet')}
</h6>
@ -421,58 +607,38 @@ const LeftPanel = () => {
</div>
</div>
</div>
</div>
{PlatformFeatures[PlatformFeature.AUTHENTICATION] && (
<div className="space-y-1 shrink-0 py-1">
<div>
<div className="mx-1 my-2 border-t border-left-panel-fg/5" />
{isAuthenticated ? <UserProfileMenu /> : <AuthLoginButton />}
</div>
</div>
)}
<div className="space-y-1 shrink-0 py-1 mt-2">
{mainMenus.map((menu) => {
if (!menu.isEnabled) {
return null
}
// Handle authentication menu specially
if (menu.title === 'common:authentication') {
return (
<div key={menu.title}>
<div className="mx-1 my-2 border-t border-left-panel-fg/5" />
{isAuthenticated ? (
<UserProfileMenu />
) : (
<AuthLoginButton />
)}
</div>
)
}
// Regular menu items must have route and icon
if (!menu.route || !menu.icon) return null
const isActive =
currentPath.includes(route.settings.index) &&
menu.route.includes(route.settings.index)
return (
<Link
key={menu.title}
to={menu.route}
onClick={() => isSmallScreen && setLeftPanel(false)}
data-test-id={`menu-${menu.title}`}
className={cn(
'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
isActive
? 'bg-left-panel-fg/10'
: '[&.active]:bg-left-panel-fg/10'
)}
>
<menu.icon size={18} className="text-left-panel-fg/70" />
<span className="font-medium text-left-panel-fg/90">
{t(menu.title)}
</span>
</Link>
)
})}
<DownloadManagement />
</div>
</div>
</aside>
{/* Project Dialogs */}
<AddProjectDialog
open={projectDialogOpen}
onOpenChange={setProjectDialogOpen}
editingKey={editingProjectKey}
initialData={
editingProjectKey ? getFolderById(editingProjectKey) : undefined
}
onSave={handleProjectSave}
/>
<DeleteProjectDialog
open={deleteProjectConfirmOpen}
onOpenChange={setDeleteProjectConfirmOpen}
onConfirm={confirmProjectDelete}
projectName={
deletingProjectId ? getFolderById(deletingProjectId)?.name : undefined
}
/>
</>
)
}

Some files were not shown because too many files have changed in this diff Show More