Merge remote-tracking branch 'origin/dev' into mobile/dev

# Conflicts:
#	web-app/src/routeTree.gen.ts
#	web-app/src/routes/index.tsx
This commit is contained in:
Vanalite 2025-09-26 11:09:50 +07:00
commit a0aa0074f4
86 changed files with 4312 additions and 1359 deletions

View File

@ -79,8 +79,33 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json

View File

@ -100,13 +100,36 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Change app name for beta and nightly builds
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json

View File

@ -53,7 +53,7 @@ on:
value: ${{ jobs.build-linux-x64.outputs.APPIMAGE_FILE_NAME }}
jobs:
build-linux-x64:
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
outputs:
DEB_SIG: ${{ steps.packageinfo.outputs.DEB_SIG }}
APPIMAGE_SIG: ${{ steps.packageinfo.outputs.APPIMAGE_SIG }}
@ -117,11 +117,34 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Temporarily enable devtool on prod build
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
cat ./src-tauri/Cargo.toml
# Change app name for beta and nightly builds

View File

@ -42,31 +42,6 @@ jobs:
run: |
cargo install ctoml
- name: Create bun and uv universal
run: |
mkdir -p ./src-tauri/resources/bin/
cd ./src-tauri/resources/bin/
curl -L -o bun-darwin-x64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-x64.zip
curl -L -o bun-darwin-aarch64.zip https://github.com/oven-sh/bun/releases/download/bun-v1.2.10/bun-darwin-aarch64.zip
unzip bun-darwin-x64.zip
unzip bun-darwin-aarch64.zip
lipo -create -output bun-universal-apple-darwin bun-darwin-x64/bun bun-darwin-aarch64/bun
cp -f bun-darwin-aarch64/bun bun-aarch64-apple-darwin
cp -f bun-darwin-x64/bun bun-x86_64-apple-darwin
cp -f bun-universal-apple-darwin bun
curl -L -o uv-x86_64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-x86_64-apple-darwin.tar.gz
curl -L -o uv-arm64.tar.gz https://github.com/astral-sh/uv/releases/download/0.6.17/uv-aarch64-apple-darwin.tar.gz
tar -xzf uv-x86_64.tar.gz
tar -xzf uv-arm64.tar.gz
mv uv-x86_64-apple-darwin uv-x86_64
mv uv-aarch64-apple-darwin uv-aarch64
lipo -create -output uv-universal-apple-darwin uv-x86_64/uv uv-aarch64/uv
cp -f uv-x86_64/uv uv-x86_64-apple-darwin
cp -f uv-aarch64/uv uv-aarch64-apple-darwin
cp -f uv-universal-apple-darwin uv
ls -la
- name: Update app version
run: |
echo "Version: ${{ inputs.new_version }}"
@ -74,8 +49,35 @@ jobs:
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
ctoml ./src-tauri/Cargo.toml dependencies.tauri.features[] "devtools"
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json

View File

@ -101,7 +101,30 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Temporarily enable devtool on prod build

View File

@ -54,9 +54,32 @@ jobs:
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------Cargo.toml---------"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
if [ "${{ inputs.channel }}" != "stable" ]; then
jq '.plugins.updater.endpoints = ["https://delta.jan.ai/${{ inputs.channel }}/latest.json"]' ./src-tauri/tauri.conf.json > /tmp/tauri.conf.json
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json

View File

@ -97,9 +97,31 @@ jobs:
mv /tmp/tauri.conf.json ./src-tauri/tauri.conf.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' web-app/package.json > /tmp/package.json
mv /tmp/package.json web-app/package.json
# Update tauri plugin versions
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-hardware/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-hardware/package.json
echo "---------./src-tauri/plugins/tauri-plugin-hardware/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/package.json
jq --arg version "${{ inputs.new_version }}" '.version = $version' ./src-tauri/plugins/tauri-plugin-llamacpp/package.json > /tmp/package.json
mv /tmp/package.json ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/package.json---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/package.json
ctoml ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-hardware/Cargo.toml
ctoml ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml---------"
cat ./src-tauri/plugins/tauri-plugin-llamacpp/Cargo.toml
ctoml ./src-tauri/Cargo.toml package.version "${{ inputs.new_version }}"
echo "---------Cargo.toml---------"
echo "---------./src-tauri/Cargo.toml---------"
cat ./src-tauri/Cargo.toml
# Add sign commands to tauri.windows.conf.json

View File

@ -72,6 +72,9 @@ lint: install-and-build
test: lint
yarn download:bin
yarn download:lib
ifeq ($(OS),Windows_NT)
yarn download:windows-installer
endif
yarn test
yarn copy:assets:tauri
yarn build:icon

View File

@ -240,6 +240,12 @@ export abstract class AIEngine extends BaseExtension {
EngineManager.instance().register(this)
}
/**
* Gets model info
* @param modelId
*/
abstract get(modelId: string): Promise<modelInfo | undefined>
/**
* Lists available models
*/
@ -283,11 +289,6 @@ export abstract class AIEngine extends BaseExtension {
*/
abstract getLoadedModels(): Promise<string[]>
/**
* Optional method to get the underlying chat client
*/
getChatClient?(sessionId: string): any
/**
* Check if a tool is supported by the model
* @param modelId

View File

@ -0,0 +1,79 @@
/about /handbook 302
/about/ /handbook 302
/about/community /handbook 302
/about/handbook /handbook 302
/about/handbook/analytics /handbook 302
/about/handbook/project-management /handbook 302
/about/handbook/strategy /handbook 302
/about/handbook/website-docs /handbook 302
/about/how-we-work/product-design /handbook 302
/about/how-we-work/strategy /handbook 302
/about/investors /handbook 302
/about/roadmap /handbook 302
/about/team /handbook 302
/about/vision /handbook 302
/about/wall-of-love /handbook 302
/handbook/contributing-to-jan/ /handbook 302
/handbook/core-contributors/how-we-hire/ /handbook 302
/handbook/engineering-excellence/ /handbook 302
/handbook/engineering/ /handbook 302
/handbook/product-and-community/ /handbook 302
/handbook/products-and-innovations/ /handbook 302
/handbook/what-we-do/our-approach-to-design/ /handbook 302
/how-we-work/product-design /handbook 302
/handbook/product-and-community/approaches-to-beta-testing-and-user-engagement/ /handbook 302
/cortex/assistants /docs/ 302
/cortex/build-extension /docs/ 302
/cortex/built-in/tensorrt-llm /docs/ 302
/cortex/cli/kill /docs/ 302
/cortex/command-line /docs/ 302
/cortex/cortex-openvino /docs/ 302
/cortex/cortex-python /docs/ 302
/cortex/cortex-tensorrt-llm /docs/ 302
/cortex/desktop-installation/linux /docs/ 302
/cortex/embeddings /docs/ 302
/cortex/ext-architecture /docs/ 302
/cortex/fine-tuning /docs/ 302
/cortex/fine-tuning/overview /docs/ 302
/cortex/function-calling /docs/ 302
/cortex/installation/linux /docs/ 302
/cortex/installation/mac /docs/ 302
/cortex/model-operations /docs/ 302
/cortex/model-operations/overview /docs/ 302
/cortex/rag/overview /docs/ 302
/cortex/server /docs/ 302
/docs/tools/retrieval /docs/ 302
/developer/framework/engineering/chats /docs/ 302
/developer/framework/engineering/threads/ /docs/ 302
/developer/framework/product/chat /docs/ 302
/docs/extensions /docs/ 302
/docs/shortcuts /docs/ 302
/docs/models /docs/ 302
/integrations/function-calling/interpreter /docs/ 302
/docs/built-in/llama-cpp /docs/desktop/llama-cpp 302
/docs/install-engines /docs/desktop/llama-cpp 302
/docs/local-api /docs/desktop/api-server 302
/docs/local-engines/llama-cpp /docs/desktop/llama-cpp 302
/docs/models/manage-models /docs/desktop/manage-models 302
/cortex/vision /handbook/open-superintelligence 302
/docs/models/model-parameters /docs/desktop/model-parameters 302
/docs/remote-models/generic-openai /docs/desktop/remote-models/openai 302
/docs/threads /changelog/2024-01-16-settings-options-right-panel 302
/guides/integrations/continue/ /docs/desktop/server-examples/continue-dev 302
/continue-dev /docs/desktop/server-examples/continue-dev 302
/integrations /docs/desktop/server-examples/continue-dev 302
/integrations/coding/continue-dev /docs/desktop/server-examples/continue-dev 302
/integrations/continue/ /docs/desktop/server-examples/continue-dev 302
/integrations/coding/tabby /docs/desktop/server-examples/tabby 302
/integrations/messaging/llmcord /docs/desktop/server-examples/llmcord 302
/integrations/workflow-automation/n8n /docs/desktop/server-examples/n8n 302
/local-server/continue-dev /docs/desktop/server-examples/continue-dev 302
/local-server/data-folder /docs/desktop/desktop/data-folder 302
/local-server/llama-cpp /docs/desktop/desktop/llama-cpp 302
/local-server/n8n /docs/desktop/server-examples/n8n 302
/local-server/settings /docs/desktop/server-settings 302
/local-server/tabby /docs/desktop/server-examples/tabby 302
/local-server/troubleshooting /docs/desktop/troubleshooting 302
/mcp /docs/desktop/mcp 302
/quickstart /docs/desktop/quickstart 302
/server-examples/continue-dev /docs/desktop/server-examples/continue-dev 302

View File

@ -77,9 +77,9 @@ export default function Footer() {
return (
<footer className="py-4 w-full">
<div className="mx-auto">
<div className="grid grid-cols-1 md:grid-cols-6 gap-8">
<div className="grid grid-cols-1 lg:grid-cols-6 gap-8">
{/* Jan Logo and Newsletter */}
<div className="md:col-span-2">
<div className="md:col-span-1 lg:col-span-2">
<h2 className="text-[52px] font-bold mb-6">Jan</h2>
<div>
<div className="flex items-center gap-2 mb-3">
@ -138,7 +138,7 @@ export default function Footer() {
{/* Menu Columns */}
{FOOTER_MENUS.map((menu) => (
<div key={menu.title} className="">
<h3 className="text-lg mb-4 font-bold">{menu.title}</h3>
<h3 className="text-base mb-4 font-bold">{menu.title}</h3>
<ul className="space-y-2">
{menu.links.map((link) => (
<li key={link.name}>

View File

@ -124,7 +124,7 @@ const Home = () => {
</h1>
</div>
<p className="px-4 lg:px-0 mt-2 text-lg lg:text-2xl font-medium leading-relaxed text-white animate-fade-in-up delay-500 -tracking-[0.6px]">
Jan is the open-source ChatGPT replacement.
The best of open-source AI in an easy-to-use product.
</p>
</div>
<div className="flex px-4 flex-col lg:flex-row items-center gap-4 w-full justify-center text-center animate-fade-in-up delay-600 mt-8 lg:mt-10">

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 634 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 154 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 134 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 116 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 30 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

View File

@ -0,0 +1,268 @@
---
title: "How we (try to) benchmark GPU kernels accurately"
description: "We present the process behind how we decided to benchmark GPU kernels and iteratively improved our benchmarking pipeline"
tags: ""
categories: research
ogImage: "./_assets/cover-kernel-benchmarking.png"
date: 2025-09-17
---
# How we (try to) benchmark GPU kernels accurately
If you've read any other kernel benchmarking article before, then a lot of the information in this blogpost might be second-hand nature to you already. In fact, we would like to start off by thanking the blood, sweat and tears poured into many other kernel benchmarking guides made before us, which helped guide us in writing better benchmarking code, as well as in the creation of this blogpost.
Over here at Menlo, we recently acquired some [RTX PRO 6000 Blackwell Workstation Edition](https://www.nvidia.com/en-sg/products/workstations/professional-desktop-gpus/rtx-pro-6000/), and we are trying to make LLM inference engines like [vLLM](https://github.com/vllm-project/vllm) run faster on it. We've been writing our own kernels specifically for the RTX PRO 6000, and seeing if we can improve inference times on our hardware.
This blog will detail more of our own processes of how our ML Efficiency team identified problems in our benchmarking code, and how we iterated on it following the various cool benchmarking guides out there! Without further ado, let's start benchmarking from simple programs, to GPU kernels.
## Introduction to kernels and benchmarking
For those new to GPU programming, a [kernel](https://modal.com/gpu-glossary/device-software/kernel) is a piece of CUDA code that programmers write to execute a desired sequence of operations on the GPU. These kernels are launched once and is executed by threads that run concurrently, and we tend to launch these kernels from a [thread block grid](https://modal.com/gpu-glossary/device-software/thread-block-grid), which executes our kernel across multiple [Streaming Multiprocessors (SMs)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor) across the whole GPU.
Benchmarking is a fundamental aspect of high-performance computing. It enables us to quantitatively compare kernel performance across different problem sizes and understand how various hyperparameters impact execution speed. For GPU kernel development, benchmarking serves to assist us in iteratively optimizing our kernels to make them utilize the GPU better.
That being said, **accurate kernel benchmarking** is much more important, as benchmarking kernels that run on the GPU can become very complex, and there are many traps one might fall into if not enough care is taken when writing benchmarking scripts. A great alternative is to use tools that NVIDIA offers via their [CUDA Toolkit](https://developer.nvidia.com/cuda-toolkit), such as [Nsight CUDA Profiling Tools Interface](https://developer.nvidia.com/cupti) (cupti) or use the [Nsight Compute CLI](https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html) (`ncu`), which provide accurate measurements to benchmarking various characteristics of kernels. For us, we wanted to use Python as it made it more convenient to sweep across different problem shapes and kernels quickly, but that meant we also had to learn how to properly benchmark kernels from scratch.
We will showcase some examples of how we can benchmark kernels on the GPU. Moreover, we have chosen Python for most of our benchmarking code, as most of our own codebase is in Python, making it simple to integrate in.
## Benchmarking CUDA programs
Pytorch provides a very basic API to help time `torch` programs, by following along this [tutorial](https://docs.pytorch.org/tutorials/recipes/recipes/benchmark.html).
We can see a basic implementation could be something as simple as:
```python
def batched_dot_mul_sum(a, b):
'''Computes batched dot by multiplying and summing'''
return a.mul(b).sum(-1)
num_threads = torch.get_num_threads()
print(f'Benchmarking on {num_threads} threads')
t0 = benchmark.Timer(
stmt='batched_dot_mul_sum(x, x)',
setup='from __main__ import batched_dot_mul_sum',
globals={'x': x},
num_threads=num_threads,
label='Multithreaded batch dot',
sub_label='Implemented using mul and sum')
```
When benchmarking kernels, there are a few tips that we should do, in order to ensure that we accurately benchmark our kernels.
### 1. Always benchmark the code not with settings from your machine, but with **settings the user will see**.
Benchmarking how fast your kernels take on a 3090 is meaningless if you are serving your models on a H100 DGX node. It is always a good idea to benchmark your kernels on the hardware you plan to serve on.
### 2. **Warmup your kernels**
Take a look at this snippet from the tutorial.
```
mul_sum(x, x): 27.6 μs
mul_sum(x, x): 25.3 μs
bmm(x, x): 2775.5 μs
bmm(x, x): 22.4 μs
```
The first kernel for `bmm` takes way longer to run. This is because most of the time is spent loading [cuBLAS](https://developer.nvidia.com/cublas) kernels when running for the first time.
Warming up your kernels can be as simple as running the kernel before timing it. This helps to preliminarily load these kernels so that we only measure how long it takes for the kernel to run.
### 3. `torch.cuda.synchronize` and CUDA Events
Now we'll also introduce a new API, which is the standard to benchmarking kernels. [CUDA events](https://docs.pytorch.org/docs/stable/generated/torch.cuda.Event.html) are awesome for a variety of reasons. The simplest reason is that it measures timing from the perspective from the GPU, whereas `time.time()` and `time.perf_counter()` measure time from the perspective of the CPU.
Moreover, its simplistic API allows you to call benchmarking code like this:
```python
steps = 10
start_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
end_events = [torch.cuda.Event(enable_timing=True) for _ in range(steps)]
for i in range(steps):
start_events[i].record()
run_kernel()
end_events[i].record()
torch.cuda.synchronize()
times = [s.elapsed_time(e) for s, e in zip(start_events, end_events)]
```
The `torch.cuda.synchronize` tells the CPU to wait for the work on the GPU to finish, so that it can calculate the elapsed time after synchronization, which can be visualised here:
![image](./_assets/speechmatics-events.svg)
_Figure 1: Illustration taken from https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch_
### 4. Flush your L2 Cache
#### What is the L2 Cache
When data is read or written from [HBM or GDDR](https://www.exxactcorp.com/blog/hpc/gddr6-vs-hbm-gpu-memory), it goes through the [L2 cache](https://docs.nvidia.com/cuda/cuda-c-programming-guide/#architecture) first, which is shared by all [streaming multiprocessors (SM)](https://modal.com/gpu-glossary/device-hardware/streaming-multiprocessor). This L2 cache caches data accesses to local and global memory, and helps reuse data instead of loading it to shared memory again (which can be slow!).
Moreover, unlike the L1 cache, which is present on each SM, all SMs share the same L2 cache!
#### Why we need to flush the L2 Cache
Following this [guide](https://guillesanbri.com/CUDA-Benchmarks/#caches), if you had warmed up or run the kernel before, some of the intermediate data might be stored on the L2 cache, which means the kernel might be deceptively faster.
However, in a real world setting, you want to measure the time it takes realistically for the kernel to run, and more often than not, when running large models, you will be running more than 1 kernel. This means your cache will probably thrash often, and not have data from a specific kernel stored for reuse. Thus, to simulate this behaviour, we would flush the L2 cache beforehand to eliminate "help" from the L2 cache.
Moreover, this also makes it much easier when calculating data reuse for the kernel, as now any L2 cache usage is independent of other kernels or runs.
#### Example of not flushing L2 cache
Previously when we were initially benchmarking our kernels, we had a small mistake of not flushing the L2 cache.
![image](./_assets/exceed-sol.png)
_Figure 2: Our SOL % (which is a percentage of our observed maximum speed) is over 100% for the row for shape [2, 19456, 2560]._
#### How to flush the L2 Cache
To flush it, we should add the following lines:
```python
l2_size = torch.cuda.get_device_properties().L2_cache_size
cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
#<your benchmarking code here>
cache.zero_() # flush L2 cache
# You should flush your L2 cache within the benchmarking code if you're repeating the same process multiple times
```
This instantiates data the size of the L2 cache, and by zeroing it in place, we call a write operation which goes through the L2 cache and flushes it.
After flushing the L2 cache, we get a more sensible result here:
![image](./_assets/fixed-l2.png)
_Figure 3: New SOL% has all values under 100% now after flushing L2 cache._
### 5. Timing short-lived kernels
Initially, we used [Triton's](https://triton-lang.org/main/getting-started/installation.html) [`do_bench`](https://triton-lang.org/main/python-api/generated/triton.testing.do_bench.html) for benchmarking, as it has done everything we have mentioned above, such as warmup, CUDA Events and flushing L2 cache. However, we observed an issue with accurately benchmarking our kernels on smaller shapes. On smaller shapes, the kernel might be too fast, so it may finish before CPU issues a CUDA end event in Python.
![image](./_assets/speechmatics-too-fast.png)
_Figure 4: Taken from [Speechmatics](https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch), kernel is faster than CUDA event end launch, and therefore the true timing for the kernel is not recorded._
This results in kernels that look very slow:
![image](./_assets/small-timed-bug.png)
_Figure 5: Side by side comparison of Python benchmark latencies vs `ncu`'s timing (right) for shape [2, 19456,2560]. `ncu` records a much faster duration of 71.36 μs compared to Python's 103.9 μs_
To fix this, we wrote a custom `do_bench_cuda()`, that inserts a dummy, untimed FP32 matmul before benchmarking each shape, so that the CPU has enough time to enqueue the CUDA end event.
This led to more accurate latencies for our small M kernels.
![image](./_assets/fixed-l2.png)
_Figure 6: There is a significant improvement in SOL% after inserting the dummy matmul._
We then also repeat the benchmark function for each shape on 5 copies of input/output data to make the CUDA event duration longer.
In the end, this is our `do_bench_cuda` function that we used to benchmark our kernels.
```python
import statistics
import torch
def do_bench_cuda(f, n_warmup: int = 10, n_repeats: int = 20):
l2_size = torch.cuda.get_device_properties().L2_cache_size
cache = torch.empty(l2_size, dtype=torch.uint8, device="cuda")
# Matmul in case of short lived CUDA Events
A = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
B = torch.randn(4096, 4096, dtype=torch.float32, device="cuda")
A @ B
# L2 cache flush + Warmup
for _ in range(n_warmup):
cache.zero_()
f()
start_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
end_list = [torch.cuda.Event(enable_timing=True) for _ in range(n_repeats)]
torch.cuda.synchronize()
for start, end in zip(start_list, end_list):
cache.zero_() # flush L2 cache
A @ B # add a heavy task to fill GPU pipeline
start.record()
f()
end.record()
torch.cuda.synchronize()
timings = [start.elapsed_time(end) for start, end in zip(start_list, end_list)]
return statistics.median(timings)
```
### 6. Clock Speed
This was a silent problem, and it was very hard to find out that this was causing issues on our kernels. We initially found discrepancies between `ncu`'s latency (676.64 μs) and `do_bench_cuda` latency (535 μs) when profiling on the shape [2048, 19456, 2560], as `do_bench` was reporting ~140 μs faster timings than `ncu`'s latency.
As seen, although most of our codebase for benchmarking kernels in Python, developer errors might occur, and it is always good to have a point of reference for the accurate kernel timing. Nsight Compute CLI (`ncu` for short) is a tool that can help us measure the latency of our kernels accurately, and the values obtained from here is a good figure to sanity-check our own benchmarking code.
#### 6.1 Clock Speed
Firstly, we suspected that clock speed could play a part in causing the discrepancy between `ncu`'s timings and our own benchmarking code. Clock speed can affect benchmarking times as it is the rate at which the GPU's processing units operate, and a higher clock speed translates to more operations per second, which can both speed up and slow down the kernel depending on how it was implemented.
![image](./_assets/clock-speed-effect.png)
_Figure 7: Taken from [GPU Mode Lecture 56](https://www.youtube.com/watch?v=CtrqBmYtSEk). We can see clock speed affects kernel performance. For problem shape of 1024, it got faster after increasing clock speed, while for problem shape of 384, it became slower after clock speed increased._
Looking at this [forum post](https://forums.developer.nvidia.com/t/nsight-compute-clock-speed-during-profiling/208646/3), we realised that one of the issues causing the discrepancy was because `ncu` by default locks the clock speed to the GPU base clock speed. We tried investigating by locking the clock speed to base clock speed, and also tried locking to max clock speed using `nvidia-smi -ac=<memClk>,<smClk>`. According to the GPU Mode lecture, this was not a proper solution.
This is due to the following reasons:
- Locking to max clock speed doesn't help as it just sets the ceiling of the GPU performance, our GPU can always go back to base clock speed of ~ 2287 Hz instead of boosted clock speed of 2617 Hz.
- Locking to base clock speed is also not meaningful, as it does not properly reflect the performance and experience users will get on our kernels, which at best will run on boosted clock speed.
However, we did find out that we should set the `ncu` `--clock-control` to `None`, so that it would not limit itself to just the base clock speeds. This helped improve the latency on `ncu` from 676.64 μs to 575 μs when profiled on the same problem shape of [2048, 19456, 2560].
#### 6.2 Discrepancies after `clock-control`
At the time of writing, we have observed that `ncu` sometimes gives different latency results on the same benchmarking code with the same problem shapes. The cause of this is because when we set `clock-control` speed to `None`, the GPU clock speed is stochastic, and therefore affect the latency of the kernels measured. A more holistic approach would be to also benchmark kernels across different fixed clock speeds.
![image](./_assets/ncu-compare.png)
_Figure 8: On the same benchmarking code and problem shapes, we can see vast deviations in duration, which is caused by the differences in SM Frequency. This resonates with the graphs shown in Figure 7._
As a result, there can be some discrepancy in `ncu`'s and our own benchmark timings. To figure out if your discrepancy is caused by the SM frequency, you can use the relationship
that FLOPS is directly proportional to SM clock, so their durations are inversely proportional.
In our case:
`544 / 2.14 (SM freq for 575 μs kernel) * 2.28 (SM freq for 544 μs kernel) = ~579`, so most of the discrepancy was coming from the SM frequency being different.
The final command we used was:
`ncu -s 5 -k $kernel_name --clock-control none python3 benchmarks/bench_mm.py --profile 2048 19456 2560`
Explanation of arguments:
`-s`: Number of kernels skipped
`-k`: name of kernel to profile
`--clock-control`: Whether clock speed is controlled
Below is a side to side comparison of `ncu`'s benchmarked latency and our script after all the adjustments made.
![image](./_assets/ncu-bench.png)
_Figure 9: Side by side comparison of the above `ncu` command (left) (measuring shape [2048,19456,2560]) with our own Python benchmarking script (right). We can see at most 10us difference between `Duration` in `ncu` and our benchmarking script's `Latency (us)` measurement._
## Conclusion and TLDR;
TLDR, when benchmarking
1. Make sure to use the hardware you intend to deploy on
2. Warmup before benchmarking your kernels
3. Use CUDA events
4. Flush your L2 Cache
5. Use a dummy matmul to make timings more accurate for short lived kernels
6. Ensure your clock speed doesn't cause inconsistent readings
We hope that this helps anyone who are interested in benchmarking their own kernels, or are interested in how GPU kernels are benchmarked. Happy benchmarking!
### Acknowledgements and Related Resources:
We would like to give thanks and credit to the many resources and guides that we used in our own journey of finding out how to best benchmark our kernels on our GPUs, and a lot of our work would probably not be possible if it was not for these amazing guides.
- GPU Mode Lecture 56, presented by Georgii Evtushenko : https://www.youtube.com/watch?v=CtrqBmYtSEk
- https://www.spatters.ca/mma-matmul (Benchmarking using ncu for matrix multiplications)
- https://www.speechmatics.com/company/articles-and-news/timing-operations-in-pytorch (CUDA Events)
- https://guillesanbri.com/CUDA-Benchmarks/ (Good resource for introduction to benchmarking)
- https://modal.com/gpu-glossary/device-hardware/cuda-device-architecture (Glossary of Architecture in general)
- https://docs.nvidia.com/cuda/cuda-c-programming-guide/#global-memory-5-x (L2 cache explanation)
- https://cvw.cac.cornell.edu/gpu-architecture/gpu-memory/memory_types (L1 vs L2 cache)

View File

@ -22,7 +22,7 @@ export default class JanProviderWeb extends AIEngine {
override async onLoad() {
console.log('Loading Jan Provider Extension...')
try {
// Initialize authentication and fetch models
await janApiClient.initialize()
@ -37,20 +37,43 @@ export default class JanProviderWeb extends AIEngine {
override async onUnload() {
console.log('Unloading Jan Provider Extension...')
// Clear all sessions
for (const sessionId of this.activeSessions.keys()) {
await this.unload(sessionId)
}
janProviderStore.reset()
console.log('Jan Provider Extension unloaded')
}
async get(modelId: string): Promise<modelInfo | undefined> {
return janApiClient
.getModels()
.then((list) => list.find((e) => e.id === modelId))
.then((model) =>
model
? {
id: model.id,
name: model.id, // Use ID as name for now
quant_type: undefined,
providerId: this.provider,
port: 443, // HTTPS port for API
sizeBytes: 0, // Size not provided by Jan API
tags: [],
path: undefined, // Remote model, no local path
owned_by: model.owned_by,
object: model.object,
capabilities: ['tools'], // Jan models support both tools via MCP
}
: undefined
)
}
async list(): Promise<modelInfo[]> {
try {
const janModels = await janApiClient.getModels()
return janModels.map((model) => ({
id: model.id,
name: model.id, // Use ID as name for now
@ -75,7 +98,7 @@ export default class JanProviderWeb extends AIEngine {
// For Jan API, we don't actually "load" models in the traditional sense
// We just create a session reference for tracking
const sessionId = `jan-${modelId}-${Date.now()}`
const sessionInfo: SessionInfo = {
pid: Date.now(), // Use timestamp as pseudo-PID
port: 443, // HTTPS port
@ -85,8 +108,10 @@ export default class JanProviderWeb extends AIEngine {
}
this.activeSessions.set(sessionId, sessionInfo)
console.log(`Jan model session created: ${sessionId} for model ${modelId}`)
console.log(
`Jan model session created: ${sessionId} for model ${modelId}`
)
return sessionInfo
} catch (error) {
console.error(`Failed to load Jan model ${modelId}:`, error)
@ -97,23 +122,23 @@ export default class JanProviderWeb extends AIEngine {
async unload(sessionId: string): Promise<UnloadResult> {
try {
const session = this.activeSessions.get(sessionId)
if (!session) {
return {
success: false,
error: `Session ${sessionId} not found`
error: `Session ${sessionId} not found`,
}
}
this.activeSessions.delete(sessionId)
console.log(`Jan model session unloaded: ${sessionId}`)
return { success: true }
} catch (error) {
console.error(`Failed to unload Jan session ${sessionId}:`, error)
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
error: error instanceof Error ? error.message : 'Unknown error',
}
}
}
@ -136,9 +161,12 @@ export default class JanProviderWeb extends AIEngine {
}
// Convert core chat completion request to Jan API format
const janMessages: JanChatMessage[] = opts.messages.map(msg => ({
const janMessages: JanChatMessage[] = opts.messages.map((msg) => ({
role: msg.role as 'system' | 'user' | 'assistant',
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
content:
typeof msg.content === 'string'
? msg.content
: JSON.stringify(msg.content),
}))
const janRequest = {
@ -162,18 +190,18 @@ export default class JanProviderWeb extends AIEngine {
} else {
// Return single response
const response = await janApiClient.createChatCompletion(janRequest)
// Check if aborted after completion
if (abortController?.signal?.aborted) {
throw new Error('Request was aborted')
}
return {
id: response.id,
object: 'chat.completion' as const,
created: response.created,
model: response.model,
choices: response.choices.map(choice => ({
choices: response.choices.map((choice) => ({
index: choice.index,
message: {
role: choice.message.role,
@ -182,7 +210,12 @@ export default class JanProviderWeb extends AIEngine {
reasoning_content: choice.message.reasoning_content,
tool_calls: choice.message.tool_calls,
},
finish_reason: (choice.finish_reason || 'stop') as 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call',
finish_reason: (choice.finish_reason || 'stop') as
| 'stop'
| 'length'
| 'tool_calls'
| 'content_filter'
| 'function_call',
})),
usage: response.usage,
}
@ -193,7 +226,10 @@ export default class JanProviderWeb extends AIEngine {
}
}
private async *createStreamingGenerator(janRequest: any, abortController?: AbortController) {
private async *createStreamingGenerator(
janRequest: any,
abortController?: AbortController
) {
let resolve: () => void
let reject: (error: Error) => void
const chunks: any[] = []
@ -231,7 +267,7 @@ export default class JanProviderWeb extends AIEngine {
object: chunk.object,
created: chunk.created,
model: chunk.model,
choices: chunk.choices.map(choice => ({
choices: chunk.choices.map((choice) => ({
index: choice.index,
delta: {
role: choice.delta.role,
@ -261,14 +297,14 @@ export default class JanProviderWeb extends AIEngine {
if (abortController?.signal?.aborted) {
throw new Error('Request was aborted')
}
while (yieldedIndex < chunks.length) {
yield chunks[yieldedIndex]
yieldedIndex++
}
// Wait a bit before checking again
await new Promise(resolve => setTimeout(resolve, 10))
await new Promise((resolve) => setTimeout(resolve, 10))
}
// Yield any remaining chunks
@ -291,24 +327,32 @@ export default class JanProviderWeb extends AIEngine {
}
async delete(modelId: string): Promise<void> {
throw new Error(`Delete operation not supported for remote Jan API model: ${modelId}`)
throw new Error(
`Delete operation not supported for remote Jan API model: ${modelId}`
)
}
async import(modelId: string, _opts: ImportOptions): Promise<void> {
throw new Error(`Import operation not supported for remote Jan API model: ${modelId}`)
throw new Error(
`Import operation not supported for remote Jan API model: ${modelId}`
)
}
async abortImport(modelId: string): Promise<void> {
throw new Error(`Abort import operation not supported for remote Jan API model: ${modelId}`)
throw new Error(
`Abort import operation not supported for remote Jan API model: ${modelId}`
)
}
async getLoadedModels(): Promise<string[]> {
return Array.from(this.activeSessions.values()).map(session => session.model_id)
return Array.from(this.activeSessions.values()).map(
(session) => session.model_id
)
}
async isToolSupported(modelId: string): Promise<boolean> {
// Jan models support tool calls via MCP
console.log(`Checking tool support for Jan model ${modelId}: supported`);
return true;
console.log(`Checking tool support for Jan model ${modelId}: supported`)
return true
}
}
}

View File

@ -96,18 +96,6 @@
"textAlign": "right"
}
},
{
"key": "batch_size",
"title": "Batch Size",
"description": "Logical maximum batch size for processing prompts.",
"controllerType": "input",
"controllerProps": {
"value": 2048,
"placeholder": "2048",
"type": "number",
"textAlign": "right"
}
},
{
"key": "ubatch_size",
"title": "uBatch Size",

View File

@ -46,7 +46,6 @@ export async function getLocalInstalledBackends(): Promise<
}
}
}
console.debug(local)
return local
}
@ -319,7 +318,10 @@ export async function downloadBackend(
events.emit('onFileDownloadSuccess', { modelId: taskId, downloadType })
} catch (error) {
// Fallback: if GitHub fails, retry once with CDN
if (source === 'github') {
if (
source === 'github' &&
error?.toString() !== 'Error: Download cancelled'
) {
console.warn(`GitHub download failed, falling back to CDN:`, error)
return await downloadBackend(backend, version, 'cdn')
}

View File

@ -37,7 +37,13 @@ import {
import { invoke } from '@tauri-apps/api/core'
import { getProxyConfig } from './util'
import { basename } from '@tauri-apps/api/path'
import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
import {
readGgufMetadata,
estimateKVCacheSize,
getModelSize,
isModelSupported,
planModelLoadInternal,
} from '@janhq/tauri-plugin-llamacpp-api'
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
// Error message constant - matches web-app/src/utils/error.ts
@ -82,6 +88,7 @@ type ModelPlan = {
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}
@ -922,6 +929,30 @@ export default class llamacpp_extension extends AIEngine {
return hash
}
override async get(modelId: string): Promise<modelInfo | undefined> {
const modelPath = await joinPath([
await this.getProviderPath(),
'models',
modelId,
])
const path = await joinPath([modelPath, 'model.yml'])
if (!(await fs.existsSync(path))) return undefined
const modelConfig = await invoke<ModelConfig>('read_yaml', {
path,
})
return {
id: modelId,
name: modelConfig.name ?? modelId,
quant_type: undefined, // TODO: parse quantization type from model.yml or model.gguf
providerId: this.provider,
port: 0, // port is not known until the model is loaded
sizeBytes: modelConfig.size_bytes ?? 0,
} as modelInfo
}
// Implement the required LocalProvider interface methods
override async list(): Promise<modelInfo[]> {
const modelsDir = await joinPath([await this.getProviderPath(), 'models'])
@ -1085,7 +1116,10 @@ export default class llamacpp_extension extends AIEngine {
const archiveName = await basename(path)
logger.info(`Installing backend from path: ${path}`)
if (!(await fs.existsSync(path)) || (!path.endsWith('tar.gz') && !path.endsWith('zip'))) {
if (
!(await fs.existsSync(path)) ||
(!path.endsWith('tar.gz') && !path.endsWith('zip'))
) {
logger.error(`Invalid path or file ${path}`)
throw new Error(`Invalid path or file ${path}`)
}
@ -1979,11 +2013,6 @@ export default class llamacpp_extension extends AIEngine {
return responseData as EmbeddingResponse
}
// Optional method for direct client access
override getChatClient(sessionId: string): any {
throw new Error('method not implemented yet')
}
/**
* Check if a tool is supported by the model
* Currently read from GGUF chat_template
@ -2046,7 +2075,7 @@ export default class llamacpp_extension extends AIEngine {
path: string,
meta: Record<string, string>
): Promise<{ layerSize: number; totalLayers: number }> {
const modelSize = await this.getModelSize(path)
const modelSize = await getModelSize(path)
const arch = meta['general.architecture']
const totalLayers = Number(meta[`${arch}.block_count`]) + 2 // 1 for lm_head layer and 1 for embedding layer
if (!totalLayers) throw new Error('Invalid metadata: block_count not found')
@ -2062,335 +2091,27 @@ export default class llamacpp_extension extends AIEngine {
/^\/\/[^/]+/.test(norm) // UNC path //server/share
)
}
/*
* if (!this.isAbsolutePath(path))
path = await joinPath([await getJanDataFolderPath(), path])
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
*/
async planModelLoad(
path: string,
mmprojPath?: string,
requestedCtx?: number
): Promise<ModelPlan> {
if (!this.isAbsolutePath(path))
if (!this.isAbsolutePath(path)) {
path = await joinPath([await getJanDataFolderPath(), path])
}
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
const modelSize = await this.getModelSize(path)
const memoryInfo = await this.getTotalSystemMemory()
const gguf = await readGgufMetadata(path)
// Get mmproj size if provided
let mmprojSize = 0
if (mmprojPath) {
mmprojSize = await this.getModelSize(mmprojPath)
}
const { layerSize, totalLayers } = await this.getLayerSize(
path,
gguf.metadata
)
const kvCachePerToken = (await this.estimateKVCache(gguf.metadata))
.perTokenSize
logger.info(
`Model size: ${modelSize}, Layer size: ${layerSize}, Total layers: ${totalLayers}, KV cache per token: ${kvCachePerToken}`
)
// Validate critical values
if (!modelSize || modelSize <= 0) {
throw new Error(`Invalid model size: ${modelSize}`)
}
if (!kvCachePerToken || kvCachePerToken <= 0) {
throw new Error(`Invalid KV cache per token: ${kvCachePerToken}`)
}
if (!layerSize || layerSize <= 0) {
throw new Error(`Invalid layer size: ${layerSize}`)
}
// Reserve memory for OS, other applications, and fixed engine overhead.
const VRAM_RESERVE_GB = 0.5
const VRAM_RESERVE_BYTES = VRAM_RESERVE_GB * 1024 * 1024 * 1024
const ENGINE_FIXED_OVERHEAD_BYTES = 0.2 * 1024 * 1024 * 1024 // For scratch buffers etc.
// Get model's maximum context length
const arch = gguf.metadata['general.architecture']
const modelMaxContextLength =
Number(gguf.metadata[`${arch}.context_length`]) || 8192
const MIN_CONTEXT_LENGTH = 1024
// Memory percentages applied to both VRAM and RAM
const memoryPercentages = { high: 0.7, medium: 0.5, low: 0.4 }
logger.info(
`Memory info - Total (VRAM + RAM): ${memoryInfo.totalMemory}, Total VRAM: ${memoryInfo.totalVRAM}, Mode: ${this.memoryMode}`
)
if (!memoryInfo.totalMemory || isNaN(memoryInfo.totalMemory)) {
throw new Error(`Invalid total memory: ${memoryInfo.totalMemory}`)
}
if (!memoryInfo.totalVRAM || isNaN(memoryInfo.totalVRAM)) {
throw new Error(`Invalid total VRAM: ${memoryInfo.totalVRAM}`)
}
if (!this.memoryMode || !(this.memoryMode in memoryPercentages)) {
throw new Error(
`Invalid memory mode: ${this.memoryMode}. Must be 'high', 'medium', or 'low'`
)
}
// Apply memory mode to both VRAM and RAM separately
const memoryModeMultiplier = memoryPercentages[this.memoryMode]
const usableVRAM = Math.max(
0,
memoryInfo.totalVRAM * memoryModeMultiplier -
VRAM_RESERVE_BYTES -
ENGINE_FIXED_OVERHEAD_BYTES
)
const actualSystemRAM = Math.max(0, memoryInfo.totalRAM)
const usableSystemMemory = actualSystemRAM * memoryModeMultiplier
logger.info(
`Actual System RAM: ${actualSystemRAM}, Usable VRAM for plan: ${usableVRAM}, Usable System Memory: ${usableSystemMemory}`
)
let gpuLayers = 0
let maxContextLength = 0
let noOffloadKVCache = false
let mode: ModelPlan['mode'] = 'Unsupported'
let offloadMmproj = false
let remainingVRAM = usableVRAM
if (mmprojSize > 0 && mmprojSize <= remainingVRAM) {
offloadMmproj = true
remainingVRAM -= mmprojSize
}
const vramForMinContext = (
await this.estimateKVCache(gguf.metadata, MIN_CONTEXT_LENGTH)
).size
const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
logger.error(
`Model unsupported. Not enough resources for model and min context.`
)
return {
gpuLayers: 0,
maxContextLength: 0,
noOffloadKVCache: true,
mode: 'Unsupported',
offloadMmproj: false,
}
}
const targetContext = Math.min(
requestedCtx || modelMaxContextLength,
modelMaxContextLength
)
let targetContextSize = (
await this.estimateKVCache(gguf.metadata, targetContext)
).size
// Use `kvCachePerToken` for all VRAM calculations
if (modelSize + targetContextSize <= remainingVRAM) {
mode = 'GPU'
gpuLayers = totalLayers
maxContextLength = targetContext
noOffloadKVCache = false
logger.info(
'Planning: Ideal case fits. All layers and target context in VRAM.'
)
} else if (modelSize <= remainingVRAM) {
mode = 'GPU'
gpuLayers = totalLayers
noOffloadKVCache = false
const vramLeftForContext = remainingVRAM - modelSize
maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
// Add safety check to prevent OOM
const safetyBuffer = 0.9 // Use 90% of calculated context to be safe
maxContextLength = Math.floor(maxContextLength * safetyBuffer)
logger.info(
`Planning: All layers fit in VRAM, but context must be reduced. VRAM left: ${vramLeftForContext}, kvCachePerToken: ${kvCachePerToken}, calculated context: ${maxContextLength}`
)
} else {
const vramAvailableForLayers = remainingVRAM - vramForMinContext
if (vramAvailableForLayers >= layerSize) {
mode = 'Hybrid'
gpuLayers = Math.min(
Math.floor(vramAvailableForLayers / layerSize),
totalLayers
)
noOffloadKVCache = false
const vramUsedByLayers = gpuLayers * layerSize
const vramLeftForContext = remainingVRAM - vramUsedByLayers
maxContextLength = Math.floor(vramLeftForContext / kvCachePerToken)
logger.info(
'Planning: Hybrid mode. Offloading layers to fit context in VRAM.'
)
}
}
// Fallback logic: try different configurations if no VRAM-based plan worked
if (mode === 'Unsupported') {
logger.info('Planning: Trying fallback configurations...')
// Try putting some layers on GPU with KV cache in RAM
const possibleGpuLayers = Math.floor(remainingVRAM / layerSize)
if (possibleGpuLayers > 0) {
gpuLayers = Math.min(possibleGpuLayers, totalLayers)
const ramUsedByCpuLayers = (totalLayers - gpuLayers) * layerSize
const ramUsedByMmproj = !offloadMmproj ? mmprojSize : 0
const availableRamForKv =
usableSystemMemory - (ramUsedByCpuLayers + ramUsedByMmproj)
// Note: Use `kvCachePerToken` for RAM calculation, as the overhead is GPU-specific
const contextInRam = Math.floor(availableRamForKv / kvCachePerToken)
if (contextInRam >= MIN_CONTEXT_LENGTH) {
mode = 'Hybrid'
maxContextLength = contextInRam
noOffloadKVCache = true
logger.info(
`Planning: Fallback hybrid - GPU layers: ${gpuLayers}, Context in RAM: ${maxContextLength}`
)
}
}
// If still unsupported, try pure CPU mode
if (mode === 'Unsupported') {
gpuLayers = 0
noOffloadKVCache = true
offloadMmproj = false
const ramUsedByModel = modelSize + mmprojSize
const availableRamForKv = usableSystemMemory - ramUsedByModel
maxContextLength = Math.floor(availableRamForKv / kvCachePerToken)
if (maxContextLength >= MIN_CONTEXT_LENGTH) {
mode = 'CPU'
logger.info(`Planning: CPU mode - Context: ${maxContextLength}`)
}
}
}
if (mode === 'CPU' || noOffloadKVCache) {
offloadMmproj = false
}
if (requestedCtx && requestedCtx > 0) {
maxContextLength = Math.min(maxContextLength, requestedCtx)
}
maxContextLength = Math.min(maxContextLength, modelMaxContextLength)
if (maxContextLength < MIN_CONTEXT_LENGTH) {
mode = 'Unsupported'
}
if (mode === 'Unsupported') {
gpuLayers = 0
maxContextLength = 0
}
maxContextLength = isNaN(maxContextLength)
? 0
: Math.floor(maxContextLength)
const mmprojInfo = mmprojPath
? `, mmprojSize=${(mmprojSize / (1024 * 1024)).toFixed(
2
)}MB, offloadMmproj=${offloadMmproj}`
: ''
logger.info(
`Final plan for ${path}: gpuLayers=${gpuLayers}/${totalLayers}, ` +
`maxContextLength=${maxContextLength}, noOffloadKVCache=${noOffloadKVCache}, ` +
`mode=${mode}${mmprojInfo}`
)
return {
gpuLayers,
maxContextLength,
noOffloadKVCache,
mode,
offloadMmproj,
}
}
/**
* estimate KVCache size from a given metadata
*/
private async estimateKVCache(
meta: Record<string, string>,
ctx_size?: number
): Promise<{ size: number; perTokenSize: number }> {
const arch = meta['general.architecture']
if (!arch) throw new Error('Invalid metadata: architecture not found')
const nLayer = Number(meta[`${arch}.block_count`])
if (!nLayer) throw new Error('Invalid metadata: block_count not found')
const nHead = Number(meta[`${arch}.attention.head_count`])
if (!nHead) throw new Error('Invalid metadata: head_count not found')
// Try to get key/value lengths first (more accurate)
const keyLen = Number(meta[`${arch}.attention.key_length`])
const valLen = Number(meta[`${arch}.attention.value_length`])
let headDim: number
if (keyLen && valLen) {
// Use explicit key/value lengths if available
logger.info(
`Using explicit key_length: ${keyLen}, value_length: ${valLen}`
)
headDim = keyLen + valLen
} else {
// Fall back to embedding_length estimation
const embeddingLen = Number(meta[`${arch}.embedding_length`])
if (!embeddingLen)
throw new Error('Invalid metadata: embedding_length not found')
// Standard transformer: head_dim = embedding_dim / num_heads
// For KV cache: we need both K and V, so 2 * head_dim per head
headDim = (embeddingLen / nHead) * 2
logger.info(
`Using embedding_length estimation: ${embeddingLen}, calculated head_dim: ${headDim}`
)
}
const maxCtx = Number(meta[`${arch}.context_length`])
if (!maxCtx) throw new Error('Invalid metadata: context_length not found')
// b) If the user supplied a value, clamp it to the model's max
let ctxLen = ctx_size ? Math.min(ctx_size, maxCtx) : maxCtx
logger.info(`Final context length used for KV size: ${ctxLen}`)
logger.info(`nLayer: ${nLayer}, nHead: ${nHead}, headDim (K+V): ${headDim}`)
logger.info(`ctxLen: ${ctxLen}`)
logger.info(`nLayer: ${nLayer}`)
logger.info(`nHead: ${nHead}`)
logger.info(`headDim: ${headDim}`)
// Consider f16 by default
// Can be extended by checking cache-type-v and cache-type-k
// but we are checking overall compatibility with the default settings
// fp16 = 8 bits * 2 = 16
const bytesPerElement = 2
// Total KV cache size per token = nHead * headDim * bytesPerElement * nLayer
const kvPerToken = nHead * headDim * bytesPerElement * nLayer
return { size: ctxLen * kvPerToken, perTokenSize: kvPerToken }
}
private async getModelSize(path: string): Promise<number> {
if (path.startsWith('https://')) {
const res = await fetch(path, { method: 'HEAD' })
const len = res.headers.get('content-length')
return len ? parseInt(len, 10) : 0
} else {
return (await fs.fileStat(path)).size
try {
const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx)
return result
} catch (e) {
throw new Error(String(e))
}
}
@ -2404,50 +2125,11 @@ export default class llamacpp_extension extends AIEngine {
*/
async isModelSupported(
path: string,
ctx_size?: number
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
try {
const modelSize = await this.getModelSize(path)
const memoryInfo = await this.getTotalSystemMemory()
logger.info(`modelSize: ${modelSize}`)
const gguf = await readGgufMetadata(path)
let kvCacheSize: number
if (ctx_size) {
kvCacheSize = (await this.estimateKVCache(gguf.metadata, ctx_size)).size
} else {
kvCacheSize = (await this.estimateKVCache(gguf.metadata)).size
}
// Total memory consumption = model weights + kvcache
const totalRequired = modelSize + kvCacheSize
logger.info(
`isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
)
// Use 80% of total memory as the usable limit
const USABLE_MEMORY_PERCENTAGE = 0.9
const usableTotalMemory =
memoryInfo.totalRAM * USABLE_MEMORY_PERCENTAGE +
memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
// Check if model fits in total memory at all (this is the hard limit)
if (totalRequired > usableTotalMemory) {
return 'RED' // Truly impossible to run
}
// Check if everything fits in VRAM (ideal case)
if (totalRequired <= usableVRAM) {
return 'GREEN'
}
// If we get here, it means:
// - Total requirement fits in combined memory
// - But doesn't fit entirely in VRAM
// This is the CPU-GPU hybrid scenario
return 'YELLOW'
const result = await isModelSupported(path, Number(ctxSize))
return result
} catch (e) {
throw new Error(String(e))
}
@ -2601,7 +2283,8 @@ export default class llamacpp_extension extends AIEngine {
metadata: Record<string, string>
): Promise<number> {
// Extract vision parameters from metadata
const projectionDim = Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256
const projectionDim =
Math.floor(Number(metadata['clip.vision.projection_dim']) / 10) || 256
// Count images in messages
let imageCount = 0

View File

@ -35,7 +35,8 @@
"copy:assets:mobile": "cpx \"pre-install/*.tgz\" \"src-tauri/resources/pre-install/\" && cpx \"LICENSE\" \"src-tauri/resources/\"",
"download:lib": "node ./scripts/download-lib.mjs",
"download:bin": "node ./scripts/download-bin.mjs",
"build:tauri:win32": "yarn download:bin && yarn download:lib && yarn tauri build",
"download:windows-installer": "node ./scripts/download-win-installer-deps.mjs",
"build:tauri:win32": "yarn download:bin && yarn download:lib && yarn download:windows-installer && yarn tauri build",
"build:tauri:linux": "yarn download:bin && yarn download:lib && NO_STRIP=1 ./src-tauri/build-utils/shim-linuxdeploy.sh yarn tauri build && ./src-tauri/build-utils/buildAppImage.sh",
"build:tauri:darwin": "yarn download:bin && yarn tauri build --target universal-apple-darwin",
"build:tauri": "yarn build:icon && yarn copy:assets:tauri && run-script-os",

View File

@ -77,25 +77,6 @@ async function main() {
// Expect EEXIST error
}
// Download VC++ Redistributable 17
if (platform == 'win32') {
const vcFilename = 'vc_redist.x64.exe'
const vcUrl = 'https://aka.ms/vs/17/release/vc_redist.x64.exe'
console.log(`Downloading VC++ Redistributable...`)
const vcSavePath = path.join(tempDir, vcFilename)
if (!fs.existsSync(vcSavePath)) {
await download(vcUrl, vcSavePath)
}
// copy to tauri resources
try {
copySync(vcSavePath, libDir)
} catch (err) {
// Expect EEXIST error
}
}
console.log('Downloads completed.')
}

View File

@ -0,0 +1,83 @@
console.log('Downloading Windows installer dependencies...')
// scripts/download-win-installer-deps.mjs
import https from 'https'
import fs, { mkdirSync } from 'fs'
import os from 'os'
import path from 'path'
import { copySync } from 'cpx'
function download(url, dest) {
return new Promise((resolve, reject) => {
console.log(`Downloading ${url} to ${dest}`)
const file = fs.createWriteStream(dest)
https
.get(url, (response) => {
console.log(`Response status code: ${response.statusCode}`)
if (
response.statusCode >= 300 &&
response.statusCode < 400 &&
response.headers.location
) {
// Handle redirect
const redirectURL = response.headers.location
console.log(`Redirecting to ${redirectURL}`)
download(redirectURL, dest).then(resolve, reject) // Recursive call
return
} else if (response.statusCode !== 200) {
reject(`Failed to get '${url}' (${response.statusCode})`)
return
}
response.pipe(file)
file.on('finish', () => {
file.close(resolve)
})
})
.on('error', (err) => {
fs.unlink(dest, () => reject(err.message))
})
})
}
async function main() {
console.log('Starting Windows installer dependencies download')
const platform = os.platform() // 'darwin', 'linux', 'win32'
const arch = os.arch() // 'x64', 'arm64', etc.
if (arch != 'x64') return
const libDir = 'src-tauri/resources/lib'
const tempDir = 'scripts/dist'
try {
mkdirSync('scripts/dist')
} catch (err) {
// Expect EEXIST error if the directory already exists
}
// Download VC++ Redistributable 17
if (platform == 'win32') {
const vcFilename = 'vc_redist.x64.exe'
const vcUrl = 'https://aka.ms/vs/17/release/vc_redist.x64.exe'
console.log(`Downloading VC++ Redistributable...`)
const vcSavePath = path.join(tempDir, vcFilename)
if (!fs.existsSync(vcSavePath)) {
await download(vcUrl, vcSavePath)
}
// copy to tauri resources
try {
copySync(vcSavePath, libDir)
} catch (err) {
// Expect EEXIST error
}
}
console.log('Windows installer dependencies downloads completed.')
}
main().catch((err) => {
console.error('Error:', err)
process.exit(1)
})

View File

@ -15,6 +15,8 @@ use tauri::Runtime;
static SYSTEM_INFO: OnceLock<SystemInfo> = OnceLock::new();
pub use commands::get_system_info;
/// Initialize the hardware plugin
pub fn init<R: Runtime>() -> tauri::plugin::TauriPlugin<R> {
tauri::plugin::Builder::new("hardware")

View File

@ -24,6 +24,7 @@ tauri = { version = "2.5.0", default-features = false, features = [] }
thiserror = "2.0.12"
tokio = { version = "1", features = ["full"] }
reqwest = { version = "0.11", features = ["json", "blocking", "stream"] }
tauri-plugin-hardware = { path = "../tauri-plugin-hardware" }
# Unix-specific dependencies
[target.'cfg(unix)'.dependencies]

View File

@ -14,6 +14,10 @@ const COMMANDS: &[&str] = &[
"get_session_by_model",
// GGUF commands
"read_gguf_metadata",
"estimate_kv_cache_size",
"get_model_size",
"is_model_supported",
"plan_model_load"
];
fn main() {

View File

@ -2,28 +2,28 @@ import { invoke } from '@tauri-apps/api/core'
// Types
export interface SessionInfo {
pid: number;
port: number;
model_id: string;
model_path: string;
api_key: string;
pid: number
port: number
model_id: string
model_path: string
api_key: string
}
export interface DeviceInfo {
id: string;
name: string;
memory: number;
id: string
name: string
memory: number
}
export interface GgufMetadata {
version: number;
tensor_count: number;
metadata: Record<string, string>;
version: number
tensor_count: number
metadata: Record<string, string>
}
// Cleanup commands
export async function cleanupLlamaProcesses(): Promise<void> {
return await invoke('plugin:llamacpp|cleanup_llama_processes');
return await invoke('plugin:llamacpp|cleanup_llama_processes')
}
// LlamaCpp server commands
@ -35,12 +35,12 @@ export async function loadLlamaModel(
return await invoke('plugin:llamacpp|load_llama_model', {
backendPath,
libraryPath,
args
});
args,
})
}
export async function unloadLlamaModel(pid: number): Promise<void> {
return await invoke('plugin:llamacpp|unload_llama_model', { pid });
return await invoke('plugin:llamacpp|unload_llama_model', { pid })
}
export async function getDevices(
@ -49,8 +49,8 @@ export async function getDevices(
): Promise<DeviceInfo[]> {
return await invoke('plugin:llamacpp|get_devices', {
backendPath,
libraryPath
});
libraryPath,
})
}
export async function generateApiKey(
@ -59,35 +59,84 @@ export async function generateApiKey(
): Promise<string> {
return await invoke('plugin:llamacpp|generate_api_key', {
modelId,
apiSecret
});
apiSecret,
})
}
export async function isProcessRunning(pid: number): Promise<boolean> {
return await invoke('plugin:llamacpp|is_process_running', { pid });
return await invoke('plugin:llamacpp|is_process_running', { pid })
}
export async function getRandomPort(): Promise<number> {
return await invoke('plugin:llamacpp|get_random_port');
return await invoke('plugin:llamacpp|get_random_port')
}
export async function findSessionByModel(modelId: string): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId });
export async function findSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|find_session_by_model', { modelId })
}
export async function getLoadedModels(): Promise<string[]> {
return await invoke('plugin:llamacpp|get_loaded_models');
return await invoke('plugin:llamacpp|get_loaded_models')
}
export async function getAllSessions(): Promise<SessionInfo[]> {
return await invoke('plugin:llamacpp|get_all_sessions');
return await invoke('plugin:llamacpp|get_all_sessions')
}
export async function getSessionByModel(modelId: string): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId });
export async function getSessionByModel(
modelId: string
): Promise<SessionInfo | null> {
return await invoke('plugin:llamacpp|get_session_by_model', { modelId })
}
// GGUF commands
export async function readGgufMetadata(path: string): Promise<GgufMetadata> {
return await invoke('plugin:llamacpp|read_gguf_metadata', { path });
return await invoke('plugin:llamacpp|read_gguf_metadata', { path })
}
export async function estimateKVCacheSize(
meta: Record<string, string>,
ctxSize?: number
): Promise<{ size: number; per_token_size: number }> {
return await invoke('plugin:llamacpp|estimate_kv_cache_size', {
meta,
ctxSize,
})
}
export async function getModelSize(path: string): Promise<number> {
return await invoke('plugin:llamacpp|get_model_size', { path })
}
export async function isModelSupported(
path: string,
ctxSize?: number
): Promise<'RED' | 'YELLOW' | 'GREEN'> {
return await invoke('plugin:llamacpp|is_model_supported', {
path,
ctxSize,
})
}
export async function planModelLoadInternal(
path: string,
memoryMode: string,
mmprojPath?: string,
requestedContext?: number
): Promise<{
gpuLayers: number
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj?: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}> {
return await invoke('plugin:llamacpp|plan_model_load', {
path,
memoryMode,
mmprojPath,
requestedContext,
})
}

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-estimate-kv-cache-size"
description = "Enables the estimate_kv_cache_size command without any pre-configured scope."
commands.allow = ["estimate_kv_cache_size"]
[[permission]]
identifier = "deny-estimate-kv-cache-size"
description = "Denies the estimate_kv_cache_size command without any pre-configured scope."
commands.deny = ["estimate_kv_cache_size"]

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-get-model-size"
description = "Enables the get_model_size command without any pre-configured scope."
commands.allow = ["get_model_size"]
[[permission]]
identifier = "deny-get-model-size"
description = "Denies the get_model_size command without any pre-configured scope."
commands.deny = ["get_model_size"]

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-is-model-supported"
description = "Enables the is_model_supported command without any pre-configured scope."
commands.allow = ["is_model_supported"]
[[permission]]
identifier = "deny-is-model-supported"
description = "Denies the is_model_supported command without any pre-configured scope."
commands.deny = ["is_model_supported"]

View File

@ -0,0 +1,13 @@
# Automatically generated - DO NOT EDIT!
"$schema" = "../../schemas/schema.json"
[[permission]]
identifier = "allow-plan-model-load"
description = "Enables the plan_model_load command without any pre-configured scope."
commands.allow = ["plan_model_load"]
[[permission]]
identifier = "deny-plan-model-load"
description = "Denies the plan_model_load command without any pre-configured scope."
commands.deny = ["plan_model_load"]

View File

@ -16,6 +16,10 @@ Default permissions for the llamacpp plugin
- `allow-get-all-sessions`
- `allow-get-session-by-model`
- `allow-read-gguf-metadata`
- `allow-estimate-kv-cache-size`
- `allow-get-model-size`
- `allow-is-model-supported`
- `allow-plan-model-load`
## Permission Table
@ -55,6 +59,32 @@ Denies the cleanup_llama_processes command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-estimate-kv-cache-size`
</td>
<td>
Enables the estimate_kv_cache_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-estimate-kv-cache-size`
</td>
<td>
Denies the estimate_kv_cache_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-find-session-by-model`
</td>
@ -185,6 +215,32 @@ Denies the get_loaded_models command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-get-model-size`
</td>
<td>
Enables the get_model_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-get-model-size`
</td>
<td>
Denies the get_model_size command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-get-random-port`
</td>
@ -237,6 +293,32 @@ Denies the get_session_by_model command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-is-model-supported`
</td>
<td>
Enables the is_model_supported command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-is-model-supported`
</td>
<td>
Denies the is_model_supported command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-is-process-running`
</td>
@ -289,6 +371,32 @@ Denies the load_llama_model command without any pre-configured scope.
<tr>
<td>
`llamacpp:allow-plan-model-load`
</td>
<td>
Enables the plan_model_load command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:deny-plan-model-load`
</td>
<td>
Denies the plan_model_load command without any pre-configured scope.
</td>
</tr>
<tr>
<td>
`llamacpp:allow-read-gguf-metadata`
</td>

View File

@ -3,10 +3,10 @@ description = "Default permissions for the llamacpp plugin"
permissions = [
# Cleanup commands
"allow-cleanup-llama-processes",
# LlamaCpp server commands
"allow-load-llama-model",
"allow-unload-llama-model",
"allow-unload-llama-model",
"allow-get-devices",
"allow-generate-api-key",
"allow-is-process-running",
@ -15,7 +15,11 @@ permissions = [
"allow-get-loaded-models",
"allow-get-all-sessions",
"allow-get-session-by-model",
# GGUF commands
"allow-read-gguf-metadata"
"allow-read-gguf-metadata",
"allow-estimate-kv-cache-size",
"allow-get-model-size",
"allow-is-model-supported",
"allow-plan-model-load"
]

View File

@ -306,6 +306,18 @@
"const": "deny-cleanup-llama-processes",
"markdownDescription": "Denies the cleanup_llama_processes command without any pre-configured scope."
},
{
"description": "Enables the estimate_kv_cache_size command without any pre-configured scope.",
"type": "string",
"const": "allow-estimate-kv-cache-size",
"markdownDescription": "Enables the estimate_kv_cache_size command without any pre-configured scope."
},
{
"description": "Denies the estimate_kv_cache_size command without any pre-configured scope.",
"type": "string",
"const": "deny-estimate-kv-cache-size",
"markdownDescription": "Denies the estimate_kv_cache_size command without any pre-configured scope."
},
{
"description": "Enables the find_session_by_model command without any pre-configured scope.",
"type": "string",
@ -366,6 +378,18 @@
"const": "deny-get-loaded-models",
"markdownDescription": "Denies the get_loaded_models command without any pre-configured scope."
},
{
"description": "Enables the get_model_size command without any pre-configured scope.",
"type": "string",
"const": "allow-get-model-size",
"markdownDescription": "Enables the get_model_size command without any pre-configured scope."
},
{
"description": "Denies the get_model_size command without any pre-configured scope.",
"type": "string",
"const": "deny-get-model-size",
"markdownDescription": "Denies the get_model_size command without any pre-configured scope."
},
{
"description": "Enables the get_random_port command without any pre-configured scope.",
"type": "string",
@ -390,6 +414,18 @@
"const": "deny-get-session-by-model",
"markdownDescription": "Denies the get_session_by_model command without any pre-configured scope."
},
{
"description": "Enables the is_model_supported command without any pre-configured scope.",
"type": "string",
"const": "allow-is-model-supported",
"markdownDescription": "Enables the is_model_supported command without any pre-configured scope."
},
{
"description": "Denies the is_model_supported command without any pre-configured scope.",
"type": "string",
"const": "deny-is-model-supported",
"markdownDescription": "Denies the is_model_supported command without any pre-configured scope."
},
{
"description": "Enables the is_process_running command without any pre-configured scope.",
"type": "string",
@ -414,6 +450,18 @@
"const": "deny-load-llama-model",
"markdownDescription": "Denies the load_llama_model command without any pre-configured scope."
},
{
"description": "Enables the plan_model_load command without any pre-configured scope.",
"type": "string",
"const": "allow-plan-model-load",
"markdownDescription": "Enables the plan_model_load command without any pre-configured scope."
},
{
"description": "Denies the plan_model_load command without any pre-configured scope.",
"type": "string",
"const": "deny-plan-model-load",
"markdownDescription": "Denies the plan_model_load command without any pre-configured scope."
},
{
"description": "Enables the read_gguf_metadata command without any pre-configured scope.",
"type": "string",
@ -439,10 +487,10 @@
"markdownDescription": "Denies the unload_llama_model command without any pre-configured scope."
},
{
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`",
"description": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`",
"type": "string",
"const": "default",
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`"
"markdownDescription": "Default permissions for the llamacpp plugin\n#### This default permission set includes:\n\n- `allow-cleanup-llama-processes`\n- `allow-load-llama-model`\n- `allow-unload-llama-model`\n- `allow-get-devices`\n- `allow-generate-api-key`\n- `allow-is-process-running`\n- `allow-get-random-port`\n- `allow-find-session-by-model`\n- `allow-get-loaded-models`\n- `allow-get-all-sessions`\n- `allow-get-session-by-model`\n- `allow-read-gguf-metadata`\n- `allow-estimate-kv-cache-size`\n- `allow-get-model-size`\n- `allow-is-model-supported`\n- `allow-plan-model-load`"
}
]
}

View File

@ -1,58 +1,141 @@
use super::helpers;
use super::types::GgufMetadata;
use reqwest;
use std::fs::File;
use std::io::BufReader;
use super::utils::{estimate_kv_cache_internal, read_gguf_metadata_internal};
use crate::gguf::types::{KVCacheError, KVCacheEstimate, ModelSupportStatus};
use std::collections::HashMap;
use std::fs;
use tauri::Runtime;
use tauri_plugin_hardware::get_system_info;
/// Read GGUF metadata from a model file
#[tauri::command]
pub async fn read_gguf_metadata(path: String) -> Result<GgufMetadata, String> {
if path.starts_with("http://") || path.starts_with("https://") {
// Remote: read in 2MB chunks until successful
return read_gguf_metadata_internal(path).await;
}
#[tauri::command]
pub async fn estimate_kv_cache_size(
meta: HashMap<String, String>,
ctx_size: Option<u64>,
) -> Result<KVCacheEstimate, KVCacheError> {
estimate_kv_cache_internal(meta, ctx_size).await
}
#[tauri::command]
pub async fn get_model_size(path: String) -> Result<u64, String> {
if path.starts_with("https://") {
// Handle remote URL
let client = reqwest::Client::new();
let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
let mut total_downloaded = 0;
let mut accumulated_data = Vec::new();
let response = client
.head(&path)
.send()
.await
.map_err(|e| format!("Failed to fetch HEAD request: {}", e))?;
while total_downloaded < max_total_size {
let start = total_downloaded;
let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
let resp = client
.get(&path)
.header("Range", format!("bytes={}-{}", start, end))
.send()
.await
.map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
let chunk_data = resp
.bytes()
.await
.map_err(|e| format!("Failed to read chunk response: {}", e))?;
accumulated_data.extend_from_slice(&chunk_data);
total_downloaded += chunk_data.len();
// Try parsing after each chunk
let cursor = std::io::Cursor::new(&accumulated_data);
if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
return Ok(metadata);
}
// If we got less data than expected, we've reached EOF
if chunk_data.len() < chunk_size {
break;
}
if let Some(content_length) = response.headers().get("content-length") {
let content_length_str = content_length
.to_str()
.map_err(|e| format!("Invalid content-length header: {}", e))?;
content_length_str
.parse::<u64>()
.map_err(|e| format!("Failed to parse content-length: {}", e))
} else {
Ok(0)
}
Err("Could not parse GGUF metadata from downloaded data".to_string())
} else {
// Local: use streaming file reader
let file =
File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
let reader = BufReader::new(file);
helpers::read_gguf_metadata(reader)
.map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
// Handle local file using standard fs
let metadata =
fs::metadata(&path).map_err(|e| format!("Failed to get file metadata: {}", e))?;
Ok(metadata.len())
}
}
#[tauri::command]
pub async fn is_model_supported<R: Runtime>(
path: String,
ctx_size: Option<u32>,
app_handle: tauri::AppHandle<R>,
) -> Result<ModelSupportStatus, String> {
// Get model size
let model_size = get_model_size(path.clone()).await?;
// Get system info
let system_info = get_system_info(app_handle.clone());
log::info!("modelSize: {}", model_size);
// Read GGUF metadata
let gguf = read_gguf_metadata(path.clone()).await?;
// Calculate KV cache size
let kv_cache_size = if let Some(ctx_size) = ctx_size {
log::info!("Using ctx_size: {}", ctx_size);
estimate_kv_cache_internal(gguf.metadata, Some(ctx_size as u64))
.await
.map_err(|e| e.to_string())?
.size
} else {
estimate_kv_cache_internal(gguf.metadata, None)
.await
.map_err(|e| e.to_string())?
.size
};
// Total memory consumption = model weights + kvcache
let total_required = model_size + kv_cache_size;
log::info!(
"isModelSupported: Total memory requirement: {} for {}; Got kvCacheSize: {} from BE",
total_required,
path,
kv_cache_size
);
const RESERVE_BYTES: u64 = 2288490189;
let total_system_memory = system_info.total_memory * 1024 * 1024;
// Calculate total VRAM from all GPUs
let total_vram: u64 = if system_info.gpus.is_empty() {
// On macOS with unified memory, GPU info may be empty
// Use total RAM as VRAM since memory is shared
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
total_system_memory
} else {
system_info
.gpus
.iter()
.map(|g| g.total_memory * 1024 * 1024)
.sum::<u64>()
};
log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
let usable_vram = if total_vram > RESERVE_BYTES {
total_vram - RESERVE_BYTES
} else {
0
};
let usable_total_memory = if total_system_memory > RESERVE_BYTES {
(total_system_memory - RESERVE_BYTES) + usable_vram
} else {
0
};
log::info!("System RAM: {} bytes", &total_system_memory);
log::info!("Total VRAM: {} bytes", &total_vram);
log::info!("Usable total memory: {} bytes", &usable_total_memory);
log::info!("Usable VRAM: {} bytes", &usable_vram);
log::info!("Required: {} bytes", &total_required);
// Check if model fits in total memory at all (this is the hard limit)
if total_required > usable_total_memory {
return Ok(ModelSupportStatus::Red); // Truly impossible to run
}
// Check if everything fits in VRAM (ideal case)
if total_required <= usable_vram {
return Ok(ModelSupportStatus::Green);
}
// If we get here, it means:
// - Total requirement fits in combined memory
// - But doesn't fit entirely in VRAM
// This is the CPU-GPU hybrid scenario
Ok(ModelSupportStatus::Yellow)
}

View File

@ -1,3 +1,5 @@
pub mod commands;
pub mod helpers;
pub mod types;
pub mod utils;
pub mod model_planner;

View File

@ -0,0 +1,318 @@
use crate::gguf::commands::get_model_size;
use crate::gguf::utils::estimate_kv_cache_internal;
use crate::gguf::utils::read_gguf_metadata_internal;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use tauri::Runtime;
use tauri_plugin_hardware::get_system_info;
#[derive(Serialize, Deserialize, Clone, Debug)]
#[serde(rename_all = "camelCase")]
pub struct ModelPlan {
pub gpu_layers: u64,
pub max_context_length: u64,
pub no_offload_kv_cache: bool,
pub offload_mmproj: bool,
pub batch_size: u64,
pub mode: ModelMode,
}
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
#[serde(rename_all = "UPPERCASE")]
pub enum ModelMode {
GPU,
Hybrid,
CPU,
Unsupported,
}
#[tauri::command]
pub async fn plan_model_load<R: Runtime>(
path: String,
memory_mode: String,
mmproj_path: Option<String>,
requested_ctx: Option<u64>,
app: tauri::AppHandle<R>,
) -> Result<ModelPlan, String> {
let model_size = get_model_size(path.clone()).await?;
let sys_info = get_system_info(app.clone());
let gguf = read_gguf_metadata_internal(path.clone()).await?;
let mut mmproj_size: u64 = 0;
if let Some(ref mmproj) = mmproj_path {
mmproj_size = get_model_size(mmproj.clone()).await?;
}
let arch = gguf
.metadata
.get("general.architecture")
.ok_or("Missing architecture")?;
let repeating_layers: u64 = gguf
.metadata
.get(&format!("{arch}.block_count"))
.ok_or("Missing block_count")?
.parse()
.map_err(|_| "Invalid block_count")?;
let total_layers = repeating_layers + 1;
let layer_size = model_size / total_layers;
let kv_cache = estimate_kv_cache_internal(gguf.metadata.clone(), None)
.await
.map_err(|e| e.to_string())?;
let kv_cache_per_token = kv_cache.per_token_size;
if model_size == 0 || layer_size == 0 || kv_cache_per_token == 0 {
return Err("Invalid model/layer/cache sizes".into());
}
const RESERVE_BYTES: u64 = 2288490189;
const MIN_CONTEXT_LENGTH: u64 = 2048;
let model_max_ctx: u64 = gguf
.metadata
.get(&format!("{arch}.context_length"))
.and_then(|s| s.parse().ok())
.unwrap_or(8192);
let memory_percentages = HashMap::from([("high", 0.7), ("medium", 0.5), ("low", 0.4)]);
let multiplier = *memory_percentages
.get(memory_mode.as_str())
.ok_or("Invalid memory mode")?;
log::info!("Got GPUs:\n{:?}", &sys_info.gpus);
let total_ram: u64 = sys_info.total_memory * 1024 * 1024;
log::info!(
"Total system memory reported from tauri_plugin_hardware(in bytes): {}",
&total_ram
);
let total_vram: u64 = if sys_info.gpus.is_empty() {
// On macOS with unified memory, GPU info may be empty
// Use total RAM as VRAM since memory is shared
log::info!("No GPUs detected (likely unified memory system), using total RAM as VRAM");
total_ram
} else {
sys_info
.gpus
.iter()
.map(|g| g.total_memory * 1024 * 1024)
.sum::<u64>()
};
log::info!("Total VRAM reported/calculated (in bytes): {}", &total_vram);
let usable_vram: u64 = if total_vram > RESERVE_BYTES {
(((total_vram - RESERVE_BYTES) as f64) * multiplier) as u64
} else {
0
};
log::info!("Usable vram calculated: {}", &usable_vram);
let usable_ram: u64 = if total_ram > RESERVE_BYTES {
(((total_ram - RESERVE_BYTES) as f64) * multiplier).max(0.0) as u64
} else {
0
};
log::info!("Usable ram calculated (in bytes): {}", &usable_ram);
let mut gpu_layers = 0;
let mut max_ctx_len = 0;
let mut no_offload_kv_cache = false;
let mut mode = ModelMode::Unsupported;
let mut offload_mmproj = false;
let mut batch_size = 2048;
let total_available_mem = usable_vram.saturating_add(usable_ram);
if model_size + mmproj_size > total_available_mem {
log::info!("Model not supported in this system!");
return Ok(ModelPlan {
gpu_layers: 0,
max_context_length: 0,
no_offload_kv_cache: true,
batch_size: 64,
mode: ModelMode::Unsupported,
offload_mmproj: false,
});
}
if mmproj_size > 0 {
offload_mmproj = true;
}
let kv_min_size = estimate_kv_cache_internal(gguf.metadata.clone(), Some(MIN_CONTEXT_LENGTH))
.await
.map_err(|e| e.to_string())?
.size;
if model_size + kv_min_size + mmproj_size <= usable_vram {
log::info!("Planning mode: Full GPU offload is possible.");
mode = ModelMode::GPU;
gpu_layers = total_layers;
let vram_left_for_ctx = usable_vram.saturating_sub(model_size);
let max_ctx_by_vram = (vram_left_for_ctx / kv_cache_per_token) as u64;
let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
max_ctx_len = requested_target.min(max_ctx_by_vram);
no_offload_kv_cache = false;
offload_mmproj = true;
} else {
let mut found_plan = false;
log::info!("Attempting VRAM-Maximized Hybrid plan (KV cache in VRAM only).");
for candidate_gpu_layers in (0..=total_layers).rev() {
let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
if vram_used_by_layers > usable_vram {
continue;
}
let ram_used_by_cpu_layers =
(total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
let required_ram_for_model = ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
if required_ram_for_model > usable_ram {
continue;
}
let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
let ctx_in_vram_only = (vram_left_for_kv / kv_cache_per_token) as u64;
if ctx_in_vram_only >= MIN_CONTEXT_LENGTH {
log::info!(
"Found VRAM-Maximized Hybrid plan with {} GPU layers.",
candidate_gpu_layers
);
mode = ModelMode::Hybrid;
gpu_layers = candidate_gpu_layers;
let requested_target = requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
max_ctx_len = requested_target.min(ctx_in_vram_only);
no_offload_kv_cache = false;
found_plan = true;
break;
}
}
if !found_plan {
log::info!("VRAM-Maximized plan not feasible. Falling back to Standard Hybrid (KV cache in VRAM+RAM).");
for candidate_gpu_layers in (0..=total_layers).rev() {
let vram_used_by_layers = candidate_gpu_layers.saturating_mul(layer_size);
if vram_used_by_layers > usable_vram {
continue;
}
let vram_left_for_kv = usable_vram.saturating_sub(vram_used_by_layers);
let kv_in_vram = (vram_left_for_kv / kv_cache_per_token) as u64;
let ram_used_by_cpu_layers =
(total_layers.saturating_sub(candidate_gpu_layers)).saturating_mul(layer_size);
let ram_used_by_mmproj = if offload_mmproj { 0 } else { mmproj_size };
let required_ram_for_model =
ram_used_by_cpu_layers.saturating_add(ram_used_by_mmproj);
if required_ram_for_model > usable_ram {
continue;
}
let available_ram_for_kv = usable_ram.saturating_sub(required_ram_for_model);
let kv_in_ram = (available_ram_for_kv / kv_cache_per_token) as u64;
let total_kv_tokens = kv_in_vram.saturating_add(kv_in_ram);
if total_kv_tokens >= MIN_CONTEXT_LENGTH {
log::info!(
"Found Standard Hybrid plan with {} GPU layers.",
candidate_gpu_layers
);
mode = if candidate_gpu_layers > 0 {
ModelMode::Hybrid
} else {
ModelMode::CPU
};
gpu_layers = candidate_gpu_layers;
let requested_target =
requested_ctx.unwrap_or(model_max_ctx).min(model_max_ctx);
let max_possible_ctx = total_kv_tokens.min(model_max_ctx);
max_ctx_len = requested_target.min(max_possible_ctx);
no_offload_kv_cache = kv_in_ram > 0 && kv_in_vram == 0;
found_plan = true;
break;
}
}
}
if !found_plan {
log::info!("No hybrid plan found. Attempting CPU-only plan.");
if model_size + mmproj_size <= usable_ram {
let available_ram_for_kv = usable_ram.saturating_sub(model_size + mmproj_size);
let kv_tokens = (available_ram_for_kv / kv_cache_per_token) as u64;
if kv_tokens >= MIN_CONTEXT_LENGTH {
mode = ModelMode::CPU;
gpu_layers = 0;
max_ctx_len = kv_tokens
.min(requested_ctx.unwrap_or(model_max_ctx))
.min(model_max_ctx);
no_offload_kv_cache = true;
offload_mmproj = false;
}
}
}
}
if let Some(req) = requested_ctx {
if req > 0 {
max_ctx_len = max_ctx_len.min(req);
}
}
max_ctx_len = max_ctx_len.min(model_max_ctx);
if max_ctx_len > 0 {
log::info!("Max context before power-of-2 adjustment: {}", max_ctx_len);
max_ctx_len = 1u64 << (63 - max_ctx_len.leading_zeros());
log::info!("Adjusted max context to power of 2: {}", max_ctx_len);
}
if mode == ModelMode::Unsupported {
if max_ctx_len >= MIN_CONTEXT_LENGTH {
// do nothing, plan is viable but wasn't assigned a mode
} else {
gpu_layers = 0;
max_ctx_len = 0;
offload_mmproj = false;
}
} else if max_ctx_len < MIN_CONTEXT_LENGTH {
log::info!(
"Final context length {} is less than minimum required {}. Marking as unsupported.",
max_ctx_len,
MIN_CONTEXT_LENGTH
);
mode = ModelMode::Unsupported;
gpu_layers = 0;
max_ctx_len = 0;
offload_mmproj = false;
}
if mode == ModelMode::Hybrid {
batch_size = 256;
} else if mode == ModelMode::CPU || no_offload_kv_cache || mode == ModelMode::Unsupported {
batch_size = 64;
}
if max_ctx_len > 0 {
batch_size = batch_size.min(max_ctx_len);
} else {
batch_size = 64;
}
if mode == ModelMode::CPU || no_offload_kv_cache {
offload_mmproj = false;
}
log::info!("Planned model load params: GPU Layers: {}, max_ctx_len: {}, kv_cache offload: {}, offload mmproj: {}, batch_size: {}",
gpu_layers, max_ctx_len, !no_offload_kv_cache, offload_mmproj, batch_size);
Ok(ModelPlan {
gpu_layers,
max_context_length: max_ctx_len,
no_offload_kv_cache,
offload_mmproj,
batch_size,
mode,
})
}

View File

@ -1,4 +1,4 @@
use serde::Serialize;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::convert::TryFrom;
use std::io;
@ -52,3 +52,42 @@ pub struct GgufMetadata {
pub tensor_count: u64,
pub metadata: HashMap<String, String>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct KVCacheEstimate {
pub size: u64,
pub per_token_size: u64,
}
#[derive(Debug, thiserror::Error)]
pub enum KVCacheError {
#[error("Invalid metadata: architecture not found")]
ArchitectureNotFound,
#[error("Invalid metadata: block_count not found or invalid")]
BlockCountInvalid,
#[error("Invalid metadata: head_count not found or invalid")]
HeadCountInvalid,
#[error("Invalid metadata: embedding_length not found or invalid")]
EmbeddingLengthInvalid,
#[error("Invalid metadata: context_length not found or invalid")]
ContextLengthInvalid,
}
impl serde::Serialize for KVCacheError {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(&self.to_string())
}
}
#[derive(Debug, Clone, Copy, PartialEq, serde::Serialize)]
pub enum ModelSupportStatus {
#[serde(rename = "RED")]
Red,
#[serde(rename = "YELLOW")]
Yellow,
#[serde(rename = "GREEN")]
Green,
}

View File

@ -0,0 +1,164 @@
use crate::gguf::helpers;
use crate::gguf::types::{GgufMetadata, KVCacheError, KVCacheEstimate};
use std::collections::HashMap;
use std::fs::File;
use std::io::BufReader;
// read gguf metadata
pub async fn read_gguf_metadata_internal(path: String) -> Result<GgufMetadata, String> {
if path.starts_with("http://") || path.starts_with("https://") {
// Remote: read in 2MB chunks until successful
let client = reqwest::Client::new();
let chunk_size = 2 * 1024 * 1024; // Fixed 2MB chunks
let max_total_size = 120 * 1024 * 1024; // Don't exceed 120MB total
let mut total_downloaded = 0;
let mut accumulated_data = Vec::new();
while total_downloaded < max_total_size {
let start = total_downloaded;
let end = std::cmp::min(start + chunk_size - 1, max_total_size - 1);
let resp = client
.get(&path)
.header("Range", format!("bytes={}-{}", start, end))
.send()
.await
.map_err(|e| format!("Failed to fetch chunk {}-{}: {}", start, end, e))?;
let chunk_data = resp
.bytes()
.await
.map_err(|e| format!("Failed to read chunk response: {}", e))?;
accumulated_data.extend_from_slice(&chunk_data);
total_downloaded += chunk_data.len();
// Try parsing after each chunk
let cursor = std::io::Cursor::new(&accumulated_data);
if let Ok(metadata) = helpers::read_gguf_metadata(cursor) {
return Ok(metadata);
}
// If we got less data than expected, we've reached EOF
if chunk_data.len() < chunk_size {
break;
}
}
Err("Could not parse GGUF metadata from downloaded data".to_string())
} else {
// Local: use streaming file reader
let file =
File::open(&path).map_err(|e| format!("Failed to open local file {}: {}", path, e))?;
let reader = BufReader::new(file);
helpers::read_gguf_metadata(reader)
.map_err(|e| format!("Failed to parse GGUF metadata: {}", e))
}
}
/// Estimate KVCache size from a given metadata
pub async fn estimate_kv_cache_internal(
meta: HashMap<String, String>,
ctx_size: Option<u64>,
) -> Result<KVCacheEstimate, KVCacheError> {
log::info!("Received ctx_size parameter: {:?}", ctx_size);
let arch = meta
.get("general.architecture")
.ok_or(KVCacheError::ArchitectureNotFound)?;
// Number of layers
let n_layer_key = format!("{}.block_count", arch);
let n_layer = meta
.get(&n_layer_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0)
.ok_or(KVCacheError::BlockCountInvalid)?;
// Attention heads (use kv heads if present, else full heads)
let n_head_key = format!("{}.attention.head_count", arch);
let n_head_kv_key = format!("{}.attention.head_count_kv", arch);
let n_head = meta
.get(&n_head_kv_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0)
.unwrap_or_else(|| {
meta.get(&n_head_key)
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0)
});
if n_head == 0 {
return Err(KVCacheError::HeadCountInvalid);
}
// Key/value dimensions
let key_len_key = format!("{}.attention.key_length", arch);
let val_len_key = format!("{}.attention.value_length", arch);
let key_len = meta
.get(&key_len_key)
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0);
let val_len = meta
.get(&val_len_key)
.and_then(|s| s.parse::<u64>().ok())
.unwrap_or(0);
if key_len == 0 || val_len == 0 {
return Err(KVCacheError::EmbeddingLengthInvalid);
}
// Context length
let max_ctx_key = format!("{}.context_length", arch);
let max_ctx = meta
.get(&max_ctx_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0)
.ok_or(KVCacheError::ContextLengthInvalid)?;
let ctx_len = ctx_size.map(|size| size.min(max_ctx)).unwrap_or(max_ctx);
// Sliding window if present
let sliding_key = format!("{}.attention.sliding_window", arch);
let sliding_window = meta
.get(&sliding_key)
.and_then(|s| s.parse::<u64>().ok())
.filter(|&n| n > 0);
// Assume fp16
const BYTES_PER_ELEMENT: u64 = 2;
// Per-token KV size
let kv_per_token = n_layer * n_head * (key_len + val_len) * BYTES_PER_ELEMENT;
// Pure full-attention cost
let full_cost = ctx_len * kv_per_token;
// Pure sliding-window cost (tiny, only keeps last W tokens)
let sliding_cost = sliding_window.map(|w| w * kv_per_token);
// Middle estimate: average of sliding + full if sliding_window is present
let chosen_size = if let Some(slide) = sliding_cost {
let middle = (full_cost + slide) / 2;
log::info!(
"KV estimates -> sliding: {} bytes (~{:.2} MB), full: {} bytes (~{:.2} MB), middle: {} bytes (~{:.2} MB)",
slide,
slide as f64 / (1024.0 * 1024.0),
full_cost,
full_cost as f64 / (1024.0 * 1024.0),
middle,
middle as f64 / (1024.0 * 1024.0)
);
middle
} else {
log::info!(
"KV estimate (no SWA detected) -> full: {} bytes (~{:.2} MB)",
full_cost,
full_cost as f64 / (1024.0 * 1024.0)
);
full_cost
};
Ok(KVCacheEstimate {
size: chosen_size,
per_token_size: kv_per_token,
})
}

View File

@ -33,6 +33,10 @@ pub fn init<R: Runtime>() -> TauriPlugin<R> {
commands::get_session_by_model,
// GGUF commands
gguf::commands::read_gguf_metadata,
gguf::commands::estimate_kv_cache_size,
gguf::commands::get_model_size,
gguf::commands::is_model_supported,
gguf::model_planner::plan_model_load
])
.setup(|app, _api| {
// Initialize and manage the plugin state

View File

@ -193,7 +193,7 @@ pub fn decompress<R: Runtime>(app: tauri::AppHandle<R>, path: &str, output_dir:
fs::File::open(&path_buf).map_err(|e| e.to_string())?
}
};
#[cfg(not(windows))]
let file = fs::File::open(&path_buf).map_err(|e| e.to_string())?;
if path.ends_with(".tar.gz") {
@ -222,7 +222,10 @@ pub fn decompress<R: Runtime>(app: tauri::AppHandle<R>, path: &str, output_dir:
{
use std::os::unix::fs::PermissionsExt;
if let Some(mode) = entry.unix_mode() {
let _ = std::fs::set_permissions(&outpath, std::fs::Permissions::from_mode(mode));
let _ = std::fs::set_permissions(
&outpath,
std::fs::Permissions::from_mode(mode),
);
}
}
}

View File

@ -42,6 +42,11 @@
${If} ${FileExists} "$INSTDIR\resources\LICENSE"
CopyFiles /SILENT "$INSTDIR\resources\LICENSE" "$INSTDIR\LICENSE"
DetailPrint "Copied LICENSE to install root"
; Optional cleanup - remove from resources folder
Delete "$INSTDIR\resources\LICENSE"
${Else}
DetailPrint "LICENSE not found at expected location: $INSTDIR\resources\LICENSE"
${EndIf}
; ---- Copy vulkan-1.dll to install root ----
@ -51,6 +56,7 @@
; Optional cleanup - remove from resources folder
Delete "$INSTDIR\resources\lib\vulkan-1.dll"
; Only remove the lib directory if it's empty after removing both files
RMDir "$INSTDIR\resources\lib"
${Else}

View File

@ -2,6 +2,7 @@ import { render, screen, fireEvent } from '@testing-library/react'
import { describe, it, expect, vi } from 'vitest'
import userEvent from '@testing-library/user-event'
import React from 'react'
import '@testing-library/jest-dom'
import {
Dialog,
DialogTrigger,
@ -117,7 +118,7 @@ describe('Dialog Components', () => {
it('applies proper classes to dialog content', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
@ -128,27 +129,38 @@ describe('Dialog Components', () => {
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
const dialogContent = screen.getByRole('dialog')
expect(dialogContent).toHaveClass(
'bg-main-view',
'max-h-[calc(100%-80px)]',
'overflow-auto',
'border-main-view-fg/10',
'text-main-view-fg',
'fixed',
'top-[50%]',
'left-[50%]',
'z-50',
'z-[90]',
'grid',
'w-full',
'max-w-[calc(100%-2rem)]',
'translate-x-[-50%]',
'translate-y-[-50%]',
'border',
'gap-4',
'rounded-lg',
'shadow-lg'
'border',
'p-6',
'shadow-lg',
'duration-200',
'sm:max-w-lg'
)
})
it('applies proper classes to dialog header', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
@ -159,11 +171,11 @@ describe('Dialog Components', () => {
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
const dialogHeader = screen.getByText('Dialog Title').closest('div')
expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center')
expect(dialogHeader).toHaveClass('flex', 'flex-col', 'gap-2', 'text-center', 'sm:text-left')
})
it('applies proper classes to dialog title', async () => {
@ -299,7 +311,7 @@ describe('Dialog Components', () => {
it('supports onOpenChange callback', async () => {
const onOpenChange = vi.fn()
const user = userEvent.setup()
render(
<Dialog onOpenChange={onOpenChange}>
<DialogTrigger>Open Dialog</DialogTrigger>
@ -310,9 +322,98 @@ describe('Dialog Components', () => {
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
expect(onOpenChange).toHaveBeenCalledWith(true)
})
it('can hide close button when showCloseButton is false', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent showCloseButton={false}>
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
</DialogHeader>
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
expect(screen.queryByRole('button', { name: /close/i })).not.toBeInTheDocument()
})
it('shows close button by default', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent>
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
</DialogHeader>
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
expect(screen.getByRole('button', { name: /close/i })).toBeInTheDocument()
})
it('accepts aria-describedby prop', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent aria-describedby="custom-description">
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
</DialogHeader>
<p id="custom-description">Custom description text</p>
</DialogContent>
</Dialog>
)
await user.click(screen.getByText('Open Dialog'))
const dialogContent = screen.getByRole('dialog')
expect(dialogContent).toHaveAttribute('aria-describedby', 'custom-description')
})
it('applies data-slot attributes to components', async () => {
const user = userEvent.setup()
render(
<Dialog>
<DialogTrigger>Open Dialog</DialogTrigger>
<DialogContent>
<DialogHeader>
<DialogTitle>Dialog Title</DialogTitle>
<DialogDescription>Dialog description</DialogDescription>
</DialogHeader>
<div>Dialog body content</div>
<DialogFooter>
<button>Footer button</button>
</DialogFooter>
</DialogContent>
</Dialog>
)
expect(screen.getByText('Open Dialog')).toHaveAttribute('data-slot', 'dialog-trigger')
await user.click(screen.getByText('Open Dialog'))
expect(screen.getByRole('dialog')).toHaveAttribute('data-slot', 'dialog-content')
expect(screen.getByText('Dialog Title').closest('div')).toHaveAttribute('data-slot', 'dialog-header')
expect(screen.getByText('Dialog Title')).toHaveAttribute('data-slot', 'dialog-title')
expect(screen.getByText('Dialog description')).toHaveAttribute('data-slot', 'dialog-description')
expect(screen.getByText('Footer button').closest('div')).toHaveAttribute('data-slot', 'dialog-footer')
})
})

View File

@ -37,7 +37,7 @@ function DialogOverlay({
<DialogPrimitive.Overlay
data-slot="dialog-overlay"
className={cn(
'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-50 bg-main-view/80 backdrop-blur-sm',
'data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 fixed inset-0 z-[80] bg-main-view/80 backdrop-blur-sm',
className
)}
{...props}
@ -67,7 +67,7 @@ function DialogContent({
data-slot="dialog-content"
aria-describedby={ariaDescribedBy}
className={cn(
'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-50 grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
'bg-main-view max-h-[calc(100%-80px)] overflow-auto border-main-view-fg/10 text-main-view-fg data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 fixed top-[50%] left-[50%] z-[90] grid w-full max-w-[calc(100%-2rem)] translate-x-[-50%] translate-y-[-50%] gap-4 rounded-lg border p-6 shadow-lg duration-200 sm:max-w-lg',
className
)}
{...props}

View File

@ -21,4 +21,5 @@ export const localStorageKey = {
lastUsedAssistant: 'last-used-assistant',
favoriteModels: 'favorite-models',
setupCompleted: 'setup-completed',
threadManagement: 'thread-management',
}

View File

@ -3,6 +3,8 @@ export const route = {
home: '/',
appLogs: '/logs',
assistant: '/assistant',
project: '/project',
projectDetail: '/project/$projectId',
settings: {
index: '/settings',
model_providers: '/settings/providers',

View File

@ -4,6 +4,7 @@ import TextareaAutosize from 'react-textarea-autosize'
import { cn } from '@/lib/utils'
import { usePrompt } from '@/hooks/usePrompt'
import { useThreads } from '@/hooks/useThreads'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useCallback, useEffect, useRef, useState } from 'react'
import { Button } from '@/components/ui/button'
import {
@ -43,9 +44,15 @@ type ChatInputProps = {
showSpeedToken?: boolean
model?: ThreadModel
initialMessage?: boolean
projectId?: string
}
const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const ChatInput = ({
model,
className,
initialMessage,
projectId,
}: ChatInputProps) => {
const textareaRef = useRef<HTMLTextAreaElement>(null)
const [isFocused, setIsFocused] = useState(false)
const [rows, setRows] = useState(1)
@ -58,6 +65,8 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
const prompt = usePrompt((state) => state.prompt)
const setPrompt = usePrompt((state) => state.setPrompt)
const currentThreadId = useThreads((state) => state.currentThreadId)
const updateThread = useThreads((state) => state.updateThread)
const { getFolderById } = useThreadManagement()
const { t } = useTranslation()
const spellCheckChatInput = useGeneralSetting(
(state) => state.spellCheckChatInput
@ -177,6 +186,28 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
uploadedFiles.length > 0 ? uploadedFiles : undefined
)
setUploadedFiles([])
// Handle project assignment for new threads
if (projectId && !currentThreadId) {
const project = getFolderById(projectId)
if (project) {
// Use setTimeout to ensure the thread is created first
setTimeout(() => {
const newCurrentThreadId = useThreads.getState().currentThreadId
if (newCurrentThreadId) {
updateThread(newCurrentThreadId, {
metadata: {
project: {
id: project.id,
name: project.name,
updated_at: project.updated_at,
},
},
})
}
}, 100)
}
}
}
useEffect(() => {

View File

@ -0,0 +1,142 @@
import { Button } from '@/components/ui/button'
import { Progress } from '@/components/ui/progress'
import { useDownloadStore } from '@/hooks/useDownloadStore'
import { useGeneralSetting } from '@/hooks/useGeneralSetting'
import { useModelProvider } from '@/hooks/useModelProvider'
import { useServiceHub } from '@/hooks/useServiceHub'
import { useTranslation } from '@/i18n'
import { extractModelName } from '@/lib/models'
import { cn, sanitizeModelId } from '@/lib/utils'
import { CatalogModel } from '@/services/models/types'
import { useCallback, useMemo } from 'react'
import { useShallow } from 'zustand/shallow'
type ModelProps = {
model: CatalogModel
handleUseModel: (modelId: string) => void
}
const defaultModelQuantizations = ['iq4_xs', 'q4_k_m']
export function DownloadButtonPlaceholder({
model,
handleUseModel,
}: ModelProps) {
const { downloads, localDownloadingModels, addLocalDownloadingModel } =
useDownloadStore(
useShallow((state) => ({
downloads: state.downloads,
localDownloadingModels: state.localDownloadingModels,
addLocalDownloadingModel: state.addLocalDownloadingModel,
}))
)
const { t } = useTranslation()
const getProviderByName = useModelProvider((state) => state.getProviderByName)
const llamaProvider = getProviderByName('llamacpp')
const serviceHub = useServiceHub()
const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)
const quant =
model.quants.find((e) =>
defaultModelQuantizations.some((m) =>
e.model_id.toLowerCase().includes(m)
)
) ?? model.quants[0]
const modelId = quant?.model_id || model.model_name
const downloadProcesses = useMemo(
() =>
Object.values(downloads).map((download) => ({
id: download.name,
name: download.name,
progress: download.progress,
current: download.current,
total: download.total,
})),
[downloads]
)
const isRecommendedModel = useCallback((modelId: string) => {
return (extractModelName(modelId)?.toLowerCase() ===
'jan-nano-gguf') as boolean
}, [])
if (model.quants.length === 0) {
return (
<div className="flex items-center gap-2">
<Button
size="sm"
onClick={() => {
window.open(`https://huggingface.co/${model.model_name}`, '_blank')
}}
>
View on HuggingFace
</Button>
</div>
)
}
const modelUrl = quant?.path || modelId
const isDownloading =
localDownloadingModels.has(modelId) ||
downloadProcesses.some((e) => e.id === modelId)
const downloadProgress =
downloadProcesses.find((e) => e.id === modelId)?.progress || 0
const isDownloaded = llamaProvider?.models.some(
(m: { id: string }) =>
m.id === modelId ||
m.id === `${model.developer}/${sanitizeModelId(modelId)}`
)
const isRecommended = isRecommendedModel(model.model_name)
const handleDownload = () => {
// Immediately set local downloading state
addLocalDownloadingModel(modelId)
const mmprojPath = (
model.mmproj_models?.find(
(e) => e.model_id.toLowerCase() === 'mmproj-f16'
) || model.mmproj_models?.[0]
)?.path
serviceHub
.models()
.pullModelWithMetadata(modelId, modelUrl, mmprojPath, huggingfaceToken)
}
return (
<div
className={cn(
'flex items-center',
isRecommended && 'hub-download-button-step'
)}
>
{isDownloading && !isDownloaded && (
<div className={cn('flex items-center gap-2 w-20')}>
<Progress value={downloadProgress * 100} />
<span className="text-xs text-center text-main-view-fg/70">
{Math.round(downloadProgress * 100)}%
</span>
</div>
)}
{isDownloaded ? (
<Button
size="sm"
onClick={() => handleUseModel(modelId)}
data-test-id={`hub-model-${modelId}`}
>
{t('hub:use')}
</Button>
) : (
<Button
data-test-id={`hub-model-${modelId}`}
size="sm"
onClick={handleDownload}
className={cn(isDownloading && 'hidden')}
>
{t('hub:download')}
</Button>
)}
</div>
)
}

View File

@ -400,23 +400,33 @@ export function DownloadManagement() {
className="text-main-view-fg/70 cursor-pointer"
title="Cancel download"
onClick={() => {
serviceHub
.models()
.abortDownload(download.name)
.then(() => {
toast.info(
t('common:toast.downloadCancelled.title'),
{
id: 'cancel-download',
description: t(
'common:toast.downloadCancelled.description'
),
}
// TODO: Consolidate cancellation logic
if (download.id.startsWith('llamacpp')) {
const downloadManager =
window.core.extensionManager.getByName(
'@janhq/download-extension'
)
if (downloadProcesses.length === 0) {
setIsPopoverOpen(false)
}
})
downloadManager.cancelDownload(download.id)
} else {
serviceHub
.models()
.abortDownload(download.name)
.then(() => {
toast.info(
t('common:toast.downloadCancelled.title'),
{
id: 'cancel-download',
description: t(
'common:toast.downloadCancelled.description'
),
}
)
if (downloadProcesses.length === 0) {
setIsPopoverOpen(false)
}
})
}
}}
/>
</div>

View File

@ -4,14 +4,18 @@ import { cn } from '@/lib/utils'
import {
IconLayoutSidebar,
IconDots,
IconCirclePlusFilled,
IconSettingsFilled,
IconCirclePlus,
IconSettings,
IconStar,
IconMessageFilled,
IconAppsFilled,
IconFolderPlus,
IconMessage,
IconApps,
IconX,
IconSearch,
IconClipboardSmileFilled,
IconClipboardSmile,
IconFolder,
IconPencil,
IconTrash,
} from '@tabler/icons-react'
import { route } from '@/constants/routes'
import ThreadList from './ThreadList'
@ -28,6 +32,7 @@ import { UserProfileMenu } from '@/containers/auth/UserProfileMenu'
import { useAuth } from '@/hooks/useAuth'
import { useThreads } from '@/hooks/useThreads'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useTranslation } from '@/i18n/react-i18next-compat'
import { useMemo, useState, useEffect, useRef } from 'react'
@ -37,38 +42,40 @@ import { useSmallScreen } from '@/hooks/useMediaQuery'
import { useClickOutside } from '@/hooks/useClickOutside'
import { DeleteAllThreadsDialog } from '@/containers/dialogs'
import AddProjectDialog from '@/containers/dialogs/AddProjectDialog'
import { DeleteProjectDialog } from '@/containers/dialogs/DeleteProjectDialog'
const mainMenus = [
{
title: 'common:newChat',
icon: IconCirclePlusFilled,
icon: IconCirclePlus,
route: route.home,
isEnabled: true,
},
{
title: 'common:projects.title',
icon: IconFolderPlus,
route: route.project,
isEnabled: true,
},
{
title: 'common:assistants',
icon: IconClipboardSmileFilled,
icon: IconClipboardSmile,
route: route.assistant,
isEnabled: PlatformFeatures[PlatformFeature.ASSISTANTS],
},
{
title: 'common:hub',
icon: IconAppsFilled,
icon: IconApps,
route: route.hub.index,
isEnabled: PlatformFeatures[PlatformFeature.MODEL_HUB],
},
{
title: 'common:settings',
icon: IconSettingsFilled,
icon: IconSettings,
route: route.settings.general,
isEnabled: true,
},
{
title: 'common:authentication',
icon: null,
route: null,
isEnabled: PlatformFeatures[PlatformFeature.AUTHENTICATION],
},
]
const LeftPanel = () => {
@ -153,20 +160,65 @@ const LeftPanel = () => {
const getFilteredThreads = useThreads((state) => state.getFilteredThreads)
const threads = useThreads((state) => state.threads)
const { folders, addFolder, updateFolder, deleteFolder, getFolderById } =
useThreadManagement()
// Project dialog states
const [projectDialogOpen, setProjectDialogOpen] = useState(false)
const [editingProjectKey, setEditingProjectKey] = useState<string | null>(
null
)
const [deleteProjectConfirmOpen, setDeleteProjectConfirmOpen] =
useState(false)
const [deletingProjectId, setDeletingProjectId] = useState<string | null>(
null
)
const filteredThreads = useMemo(() => {
return getFilteredThreads(searchTerm)
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [getFilteredThreads, searchTerm, threads])
const filteredProjects = useMemo(() => {
if (!searchTerm) return folders
return folders.filter((folder) =>
folder.name.toLowerCase().includes(searchTerm.toLowerCase())
)
}, [folders, searchTerm])
// Memoize categorized threads based on filteredThreads
const favoritedThreads = useMemo(() => {
return filteredThreads.filter((t) => t.isFavorite)
}, [filteredThreads])
const unFavoritedThreads = useMemo(() => {
return filteredThreads.filter((t) => !t.isFavorite)
return filteredThreads.filter((t) => !t.isFavorite && !t.metadata?.project)
}, [filteredThreads])
// Project handlers
const handleProjectDelete = (id: string) => {
setDeletingProjectId(id)
setDeleteProjectConfirmOpen(true)
}
const confirmProjectDelete = () => {
if (deletingProjectId) {
deleteFolder(deletingProjectId)
setDeleteProjectConfirmOpen(false)
setDeletingProjectId(null)
}
}
const handleProjectSave = (name: string) => {
if (editingProjectKey) {
updateFolder(editingProjectKey, name)
} else {
addFolder(name)
}
setProjectDialogOpen(false)
setEditingProjectKey(null)
}
// Disable body scroll when panel is open on small screens
useEffect(() => {
if (isSmallScreen && open) {
@ -261,15 +313,12 @@ const LeftPanel = () => {
)}
</div>
<div className="flex flex-col justify-between overflow-hidden mt-0 !h-[calc(100%-42px)] ">
<div className={cn('flex flex-col !h-[calc(100%-200px)]')}>
<div className="flex flex-col gap-y-1 overflow-hidden mt-0 !h-[calc(100%-42px)]">
<div className="space-y-1 py-1">
{IS_MACOS && (
<div
ref={searchContainerMacRef}
className={cn(
'relative mb-4 mt-1',
isResizableContext ? 'mx-2' : 'mx-1'
)}
className={cn('relative mb-2 mt-1 mx-1')}
data-ignore-outside-clicks
>
<IconSearch className="absolute size-4 top-1/2 left-2 -translate-y-1/2 text-left-panel-fg/50" />
@ -295,7 +344,151 @@ const LeftPanel = () => {
)}
</div>
)}
<div className="flex flex-col w-full overflow-y-auto overflow-x-hidden">
{mainMenus.map((menu) => {
if (!menu.isEnabled) {
return null
}
// Handle authentication menu specially
if (menu.title === 'common:authentication') {
return (
<div key={menu.title}>
<div className="mx-1 my-2 border-t border-left-panel-fg/5" />
{isAuthenticated ? (
<UserProfileMenu />
) : (
<AuthLoginButton />
)}
</div>
)
}
// Regular menu items must have route and icon
if (!menu.route || !menu.icon) return null
const isActive = (() => {
// Settings routes
if (menu.route.includes(route.settings.index)) {
return currentPath.includes(route.settings.index)
}
// Default exact match for other routes
return currentPath === menu.route
})()
return (
<Link
key={menu.title}
to={menu.route}
onClick={() => isSmallScreen && setLeftPanel(false)}
data-test-id={`menu-${menu.title}`}
activeOptions={{ exact: true }}
className={cn(
'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
isActive && 'bg-left-panel-fg/10'
)}
>
<menu.icon size={18} className="text-left-panel-fg/70" />
<span className="font-medium text-left-panel-fg/90">
{t(menu.title)}
</span>
</Link>
)
})}
</div>
{filteredProjects.length > 0 && (
<div className="space-y-1 py-1">
<div className="flex items-center justify-between mb-2">
<span className="block text-xs text-left-panel-fg/50 px-1 font-semibold">
{t('common:projects.title')}
</span>
</div>
<div className="flex flex-col max-h-[140px] overflow-y-scroll">
{filteredProjects
.slice()
.sort((a, b) => b.updated_at - a.updated_at)
.map((folder) => {
const ProjectItem = () => {
const [openDropdown, setOpenDropdown] = useState(false)
const isProjectActive =
currentPath === `/project/${folder.id}`
return (
<div key={folder.id} className="mb-1">
<div
className={cn(
'rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/project-list transition-all cursor-pointer',
isProjectActive && 'bg-left-panel-fg/10'
)}
>
<Link
to="/project/$projectId"
params={{ projectId: folder.id }}
onClick={() =>
isSmallScreen && setLeftPanel(false)
}
className="py-1 pr-2 truncate flex items-center gap-2 flex-1"
>
<IconFolder
size={16}
className="text-left-panel-fg/70"
/>
<span className="text-sm text-left-panel-fg/90">
{folder.name}
</span>
</Link>
<div className="flex items-center">
<DropdownMenu
open={openDropdown}
onOpenChange={(open) => setOpenDropdown(open)}
>
<DropdownMenuTrigger asChild>
<IconDots
size={14}
className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/project-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
onClick={(e) => {
e.preventDefault()
e.stopPropagation()
}}
/>
</DropdownMenuTrigger>
<DropdownMenuContent side="bottom" align="end">
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
setEditingProjectKey(folder.id)
setProjectDialogOpen(true)
}}
>
<IconPencil size={16} />
<span>Edit</span>
</DropdownMenuItem>
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
handleProjectDelete(folder.id)
}}
>
<IconTrash size={16} />
<span>Delete</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
</div>
</div>
)
}
return <ProjectItem key={folder.id} />
})}
</div>
</div>
)}
<div className="flex flex-col h-full overflow-y-scroll w-[calc(100%+6px)]">
<div className="flex flex-col w-full h-full overflow-y-auto overflow-x-hidden">
<div className="h-full w-full overflow-y-auto">
{favoritedThreads.length > 0 && (
<>
@ -398,7 +591,7 @@ const LeftPanel = () => {
<>
<div className="px-1 mt-2">
<div className="flex items-center gap-1 text-left-panel-fg/80">
<IconMessageFilled size={18} />
<IconMessage size={18} />
<h6 className="font-medium text-base">
{t('common:noThreadsYet')}
</h6>
@ -415,59 +608,38 @@ const LeftPanel = () => {
</div>
</div>
</div>
{PlatformFeatures[PlatformFeature.AUTHENTICATION] && (
<div className="space-y-1 shrink-0 py-1">
<div>
<div className="mx-1 my-2 border-t border-left-panel-fg/5" />
{isAuthenticated ? <UserProfileMenu /> : <AuthLoginButton />}
</div>
</div>
)}
<DownloadManagement />
</div>
<div className="space-y-1 shrink-0 py-1 mt-2">
{mainMenus.map((menu) => {
if (!menu.isEnabled) {
return null
}
// Handle authentication menu specially
if (menu.title === 'common:authentication') {
return (
<div key={menu.title}>
<div className="mx-1 my-2 border-t border-left-panel-fg/5" />
{isAuthenticated ? (
<UserProfileMenu />
) : (
<AuthLoginButton />
)}
</div>
)
}
// Regular menu items must have route and icon
if (!menu.route || !menu.icon) return null
const isActive =
currentPath.includes(route.settings.index) &&
menu.route.includes(route.settings.index)
return (
<Link
key={menu.title}
to={menu.route}
onClick={() => isSmallScreen && setLeftPanel(false)}
data-test-id={`menu-${menu.title}`}
className={cn(
'flex items-center gap-1.5 cursor-pointer hover:bg-left-panel-fg/10 py-1 px-1 rounded',
isActive
? 'bg-left-panel-fg/10'
: '[&.active]:bg-left-panel-fg/10'
)}
>
<menu.icon size={18} className="text-left-panel-fg/70" />
<span className="font-medium text-left-panel-fg/90">
{t(menu.title)}
</span>
</Link>
)
})}
</div>
<DownloadManagement />
</div>
</aside>
{/* Project Dialogs */}
<AddProjectDialog
open={projectDialogOpen}
onOpenChange={setProjectDialogOpen}
editingKey={editingProjectKey}
initialData={
editingProjectKey ? getFolderById(editingProjectKey) : undefined
}
onSave={handleProjectSave}
/>
<DeleteProjectDialog
open={deleteProjectConfirmOpen}
onOpenChange={setDeleteProjectConfirmOpen}
onConfirm={confirmProjectDelete}
projectName={
deletingProjectId ? getFolderById(deletingProjectId)?.name : undefined
}
/>
</>
)
}

View File

@ -103,6 +103,13 @@ export function ModelSetting({
})
}
if (model.settings?.batch_size && result.batchSize !== undefined) {
settingsToUpdate.push({
key: 'batch_size',
value: result.batchSize,
})
}
// Apply all settings in a single update to avoid race conditions
if (settingsToUpdate.length > 0) {
handleMultipleSettingsChange(settingsToUpdate)
@ -163,7 +170,8 @@ export function ModelSetting({
key === 'ctx_len' ||
key === 'ngl' ||
key === 'chat_template' ||
key === 'offload_mmproj'
key === 'offload_mmproj' ||
key === 'batch_size'
)
if (requiresRestart) {
@ -222,7 +230,8 @@ export function ModelSetting({
key === 'ctx_len' ||
key === 'ngl' ||
key === 'chat_template' ||
key === 'offload_mmproj'
key === 'offload_mmproj' ||
key === 'batch_size'
) {
// Check if model is running before stopping it
serviceHub

View File

@ -89,6 +89,7 @@ const CodeComponent = memo(
onCopy,
copiedId,
...props
// eslint-disable-next-line @typescript-eslint/no-explicit-any
}: any) => {
const { t } = useTranslation()
const match = /language-(\w+)/.exec(className || '')

View File

@ -20,7 +20,7 @@ function SetupScreen() {
localStorage.getItem(localStorageKey.setupCompleted) === 'true'
return (
<div className="flex h-full flex-col flex-justify-center">
<div className="flex h-full flex-col justify-center">
<HeaderPage></HeaderPage>
<div className="h-full px-8 overflow-y-auto flex flex-col gap-2 justify-center ">
<div className="w-full lg:w-4/6 mx-auto">

View File

@ -16,9 +16,13 @@ import {
IconDots,
IconStarFilled,
IconStar,
IconFolder,
IconX,
} from '@tabler/icons-react'
import { useThreads } from '@/hooks/useThreads'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useLeftPanel } from '@/hooks/useLeftPanel'
import { useMessages } from '@/hooks/useMessages'
import { cn } from '@/lib/utils'
import { useSmallScreen } from '@/hooks/useMediaQuery'
@ -28,147 +32,274 @@ import {
DropdownMenuItem,
DropdownMenuSeparator,
DropdownMenuTrigger,
DropdownMenuSub,
DropdownMenuSubContent,
DropdownMenuSubTrigger,
} from '@/components/ui/dropdown-menu'
import { useTranslation } from '@/i18n/react-i18next-compat'
import { memo, useMemo, useState } from 'react'
import { memo, MouseEvent, useMemo, useState } from 'react'
import { useNavigate, useMatches } from '@tanstack/react-router'
import { RenameThreadDialog, DeleteThreadDialog } from '@/containers/dialogs'
import { route } from '@/constants/routes'
import { toast } from 'sonner'
const SortableItem = memo(({ thread }: { thread: Thread }) => {
const {
attributes,
listeners,
setNodeRef,
transform,
transition,
isDragging,
} = useSortable({ id: thread.id, disabled: true })
const SortableItem = memo(
({
thread,
variant,
}: {
thread: Thread
variant?: 'default' | 'project'
}) => {
const {
attributes,
listeners,
setNodeRef,
transform,
transition,
isDragging,
} = useSortable({ id: thread.id, disabled: true })
const isSmallScreen = useSmallScreen()
const setLeftPanel = useLeftPanel(state => state.setLeftPanel)
const isSmallScreen = useSmallScreen()
const setLeftPanel = useLeftPanel((state) => state.setLeftPanel)
const style = {
transform: CSS.Transform.toString(transform),
transition,
opacity: isDragging ? 0.5 : 1,
}
const toggleFavorite = useThreads((state) => state.toggleFavorite)
const deleteThread = useThreads((state) => state.deleteThread)
const renameThread = useThreads((state) => state.renameThread)
const { t } = useTranslation()
const [openDropdown, setOpenDropdown] = useState(false)
const navigate = useNavigate()
// Check if current route matches this thread's detail page
const matches = useMatches()
const isActive = matches.some(
(match) =>
match.routeId === '/threads/$threadId' &&
'threadId' in match.params &&
match.params.threadId === thread.id
)
const style = {
transform: CSS.Transform.toString(transform),
transition,
opacity: isDragging ? 0.5 : 1,
}
const toggleFavorite = useThreads((state) => state.toggleFavorite)
const deleteThread = useThreads((state) => state.deleteThread)
const renameThread = useThreads((state) => state.renameThread)
const updateThread = useThreads((state) => state.updateThread)
const getFolderById = useThreadManagement().getFolderById
const { folders } = useThreadManagement()
const getMessages = useMessages((state) => state.getMessages)
const { t } = useTranslation()
const [openDropdown, setOpenDropdown] = useState(false)
const navigate = useNavigate()
// Check if current route matches this thread's detail page
const matches = useMatches()
const isActive = matches.some(
(match) =>
match.routeId === '/threads/$threadId' &&
'threadId' in match.params &&
match.params.threadId === thread.id
)
const handleClick = () => {
if (!isDragging) {
// Only close panel and navigate if the thread is not already active
if (!isActive) {
if (isSmallScreen) setLeftPanel(false)
navigate({ to: route.threadsDetail, params: { threadId: thread.id } })
const handleClick = (e: MouseEvent<HTMLDivElement>) => {
if (openDropdown) {
e.stopPropagation()
e.preventDefault()
return
}
if (!isDragging) {
// Only close panel and navigate if the thread is not already active
if (!isActive) {
if (isSmallScreen) setLeftPanel(false)
navigate({ to: route.threadsDetail, params: { threadId: thread.id } })
}
}
}
}
const plainTitleForRename = useMemo(() => {
// Basic HTML stripping for simple span tags.
// If thread.title is undefined or null, treat as empty string before replace.
return (thread.title || '').replace(/<span[^>]*>|<\/span>/g, '')
}, [thread.title])
const plainTitleForRename = useMemo(() => {
// Basic HTML stripping for simple span tags.
// If thread.title is undefined or null, treat as empty string before replace.
return (thread.title || '').replace(/<span[^>]*>|<\/span>/g, '')
}, [thread.title])
const assignThreadToProject = (threadId: string, projectId: string) => {
const project = getFolderById(projectId)
if (project && updateThread) {
const projectMetadata = {
id: project.id,
name: project.name,
updated_at: project.updated_at,
}
return (
<div
ref={setNodeRef}
style={style}
{...attributes}
{...listeners}
onClick={handleClick}
onContextMenu={(e) => {
e.preventDefault()
e.stopPropagation()
setOpenDropdown(true)
}}
className={cn(
'mb-1 rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/thread-list transition-all',
isDragging ? 'cursor-move' : 'cursor-pointer',
isActive && 'bg-left-panel-fg/10'
)}
>
<div className="py-1 pr-2 truncate">
<span>{thread.title || t('common:newThread')}</span>
</div>
<div className="flex items-center">
<DropdownMenu
open={openDropdown}
onOpenChange={(open) => setOpenDropdown(open)}
updateThread(threadId, {
metadata: {
...thread.metadata,
project: projectMetadata,
},
})
toast.success(`Thread assigned to "${project.name}" successfully`)
}
}
const getLastMessageInfo = useMemo(() => {
const messages = getMessages(thread.id)
if (messages.length === 0) return null
const lastMessage = messages[messages.length - 1]
return {
date: new Date(lastMessage.created_at || 0),
content: lastMessage.content?.[0]?.text?.value || '',
}
}, [getMessages, thread.id])
return (
<div
ref={setNodeRef}
style={style}
{...attributes}
{...listeners}
className={cn(
'rounded hover:bg-left-panel-fg/10 flex items-center justify-between gap-2 px-1.5 group/thread-list transition-all',
variant === 'project'
? 'mb-2 rounded-lg px-4 border border-main-view-fg/10 bg-main-view-fg/5'
: 'mb-1',
isDragging ? 'cursor-move' : 'cursor-pointer',
isActive && 'bg-left-panel-fg/10'
)}
onClick={(e) => handleClick(e)}
onContextMenu={(e) => {
e.preventDefault()
e.stopPropagation()
setOpenDropdown(true)
}}
>
<div
className={cn(
'pr-2 truncate flex-1',
variant === 'project' ? 'py-2 cursor-pointer' : 'py-1'
)}
>
<DropdownMenuTrigger asChild>
<IconDots
size={14}
className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/thread-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
onClick={(e) => {
e.preventDefault()
e.stopPropagation()
}}
/>
</DropdownMenuTrigger>
<DropdownMenuContent side="bottom" align="end">
{thread.isFavorite ? (
<DropdownMenuItem
<span>{thread.title || t('common:newThread')}</span>
{variant === 'project' && (
<>
{variant === 'project' && getLastMessageInfo?.content && (
<div className="text-sm text-main-view-fg/60 mt-0.5 line-clamp-2">
{getLastMessageInfo.content}
</div>
)}
</>
)}
</div>
<div className="flex items-center">
<DropdownMenu
open={openDropdown}
onOpenChange={(open) => setOpenDropdown(open)}
>
<DropdownMenuTrigger asChild>
<IconDots
size={14}
className="text-left-panel-fg/60 shrink-0 cursor-pointer px-0.5 -mr-1 data-[state=open]:bg-left-panel-fg/10 rounded group-hover/thread-list:data-[state=closed]:size-5 size-5 data-[state=closed]:size-0"
onClick={(e) => {
e.preventDefault()
e.stopPropagation()
toggleFavorite(thread.id)
}}
>
<IconStarFilled />
<span>{t('common:unstar')}</span>
</DropdownMenuItem>
) : (
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
toggleFavorite(thread.id)
}}
>
<IconStar />
<span>{t('common:star')}</span>
</DropdownMenuItem>
)}
<RenameThreadDialog
thread={thread}
plainTitleForRename={plainTitleForRename}
onRename={renameThread}
onDropdownClose={() => setOpenDropdown(false)}
/>
/>
</DropdownMenuTrigger>
<DropdownMenuContent side="bottom" align="end" className="w-44">
{thread.isFavorite ? (
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
toggleFavorite(thread.id)
}}
>
<IconStarFilled />
<span>{t('common:unstar')}</span>
</DropdownMenuItem>
) : (
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
toggleFavorite(thread.id)
}}
>
<IconStar />
<span>{t('common:star')}</span>
</DropdownMenuItem>
)}
<RenameThreadDialog
thread={thread}
plainTitleForRename={plainTitleForRename}
onRename={renameThread}
onDropdownClose={() => setOpenDropdown(false)}
/>
<DropdownMenuSeparator />
<DeleteThreadDialog
thread={thread}
onDelete={deleteThread}
onDropdownClose={() => setOpenDropdown(false)}
/>
</DropdownMenuContent>
</DropdownMenu>
<DropdownMenuSub>
<DropdownMenuSubTrigger className="gap-2">
<IconFolder size={16} />
<span>Add to project</span>
</DropdownMenuSubTrigger>
<DropdownMenuSubContent>
{folders.length === 0 ? (
<DropdownMenuItem disabled>
<span className="text-left-panel-fg/50">
No projects available
</span>
</DropdownMenuItem>
) : (
folders
.sort((a, b) => b.updated_at - a.updated_at)
.map((folder) => (
<DropdownMenuItem
key={folder.id}
onClick={(e) => {
e.stopPropagation()
assignThreadToProject(thread.id, folder.id)
}}
>
<IconFolder size={16} />
<span className="truncate max-w-[200px]">
{folder.name}
</span>
</DropdownMenuItem>
))
)}
{thread.metadata?.project && (
<>
<DropdownMenuSeparator />
<DropdownMenuItem
onClick={(e) => {
e.stopPropagation()
// Remove project from metadata
const projectName = thread.metadata?.project?.name
updateThread(thread.id, {
metadata: {
...thread.metadata,
project: undefined,
},
})
toast.success(
`Thread removed from "${projectName}" successfully`
)
}}
>
<IconX size={16} />
<span>Remove from project</span>
</DropdownMenuItem>
</>
)}
</DropdownMenuSubContent>
</DropdownMenuSub>
<DropdownMenuSeparator />
<DeleteThreadDialog
thread={thread}
onDelete={deleteThread}
onDropdownClose={() => setOpenDropdown(false)}
variant={variant}
/>
</DropdownMenuContent>
</DropdownMenu>
</div>
</div>
</div>
)
})
)
}
)
type ThreadListProps = {
threads: Thread[]
isFavoriteSection?: boolean
variant?: 'default' | 'project'
showDate?: boolean
}
function ThreadList({ threads }: ThreadListProps) {
function ThreadList({ threads, variant = 'default' }: ThreadListProps) {
const sortedThreads = useMemo(() => {
return threads.sort((a, b) => {
return (b.updated || 0) - (a.updated || 0)
@ -192,7 +323,7 @@ function ThreadList({ threads }: ThreadListProps) {
strategy={verticalListSortingStrategy}
>
{sortedThreads.map((thread, index) => (
<SortableItem key={index} thread={thread} />
<SortableItem key={index} thread={thread} variant={variant} />
))}
</SortableContext>
</DndContext>

View File

@ -0,0 +1,125 @@
import { useState, useEffect } from 'react'
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
DialogFooter,
} from '@/components/ui/dialog'
import { Button } from '@/components/ui/button'
import { Input } from '@/components/ui/input'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { toast } from 'sonner'
import { useTranslation } from '@/i18n/react-i18next-compat'
interface AddProjectDialogProps {
open: boolean
onOpenChange: (open: boolean) => void
editingKey: string | null
initialData?: {
id: string
name: string
updated_at: number
}
onSave: (name: string) => void
}
export default function AddProjectDialog({
open,
onOpenChange,
editingKey,
initialData,
onSave,
}: AddProjectDialogProps) {
const { t } = useTranslation()
const [name, setName] = useState(initialData?.name || '')
const { folders } = useThreadManagement()
useEffect(() => {
if (open) {
setName(initialData?.name || '')
}
}, [open, initialData])
const handleSave = () => {
if (!name.trim()) return
const trimmedName = name.trim()
// Check for duplicate names (excluding current project when editing)
const isDuplicate = folders.some(
(folder) =>
folder.name.toLowerCase() === trimmedName.toLowerCase() &&
folder.id !== editingKey
)
if (isDuplicate) {
toast.warning(t('projects.addProjectDialog.alreadyExists', { projectName: trimmedName }))
return
}
onSave(trimmedName)
// Show detailed success message
if (editingKey && initialData) {
toast.success(
t('projects.addProjectDialog.renameSuccess', {
oldName: initialData.name,
newName: trimmedName
})
)
} else {
toast.success(t('projects.addProjectDialog.createSuccess', { projectName: trimmedName }))
}
setName('')
}
const handleCancel = () => {
onOpenChange(false)
setName('')
}
// Check if the button should be disabled
const isButtonDisabled =
!name.trim() || (editingKey && name.trim() === initialData?.name)
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="sm:max-w-md">
<DialogHeader>
<DialogTitle>
{editingKey ? t('projects.addProjectDialog.editTitle') : t('projects.addProjectDialog.createTitle')}
</DialogTitle>
</DialogHeader>
<div className="space-y-4">
<div>
<label className="text-sm font-medium text-main-view-fg/80">
{t('projects.addProjectDialog.nameLabel')}
</label>
<Input
value={name}
onChange={(e) => setName(e.target.value)}
placeholder={t('projects.addProjectDialog.namePlaceholder')}
className="mt-1"
autoFocus
onKeyDown={(e) => {
if (e.key === 'Enter' && !isButtonDisabled) {
handleSave()
}
}}
/>
</div>
</div>
<DialogFooter>
<Button variant="link" onClick={handleCancel}>
{t('cancel')}
</Button>
<Button onClick={handleSave} disabled={Boolean(isButtonDisabled)}>
{editingKey ? t('projects.addProjectDialog.updateButton') : t('projects.addProjectDialog.createButton')}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
)
}

View File

@ -0,0 +1,85 @@
import { useRef } from 'react'
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
DialogFooter,
DialogDescription,
} from '@/components/ui/dialog'
import { Button } from '@/components/ui/button'
import { toast } from 'sonner'
import { useTranslation } from '@/i18n/react-i18next-compat'
interface DeleteProjectDialogProps {
open: boolean
onOpenChange: (open: boolean) => void
onConfirm: () => void
projectName?: string
}
export function DeleteProjectDialog({
open,
onOpenChange,
onConfirm,
projectName,
}: DeleteProjectDialogProps) {
const { t } = useTranslation()
const deleteButtonRef = useRef<HTMLButtonElement>(null)
const handleConfirm = () => {
try {
onConfirm()
toast.success(
projectName
? t('projects.deleteProjectDialog.successWithName', { projectName })
: t('projects.deleteProjectDialog.successWithoutName')
)
onOpenChange(false)
} catch (error) {
toast.error(t('projects.deleteProjectDialog.error'))
console.error('Delete project error:', error)
}
}
const handleKeyDown = (e: React.KeyboardEvent) => {
if (e.key === 'Enter') {
handleConfirm()
}
}
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent
className="sm:max-w-md"
onOpenAutoFocus={(e) => {
e.preventDefault()
deleteButtonRef.current?.focus()
}}
>
<DialogHeader>
<DialogTitle>{t('projects.deleteProjectDialog.title')}</DialogTitle>
<DialogDescription>
{t('projects.deleteProjectDialog.description')}
</DialogDescription>
</DialogHeader>
<DialogFooter>
<Button variant="link" onClick={() => onOpenChange(false)}>
{t('cancel')}
</Button>
<Button
ref={deleteButtonRef}
variant="destructive"
onClick={handleConfirm}
onKeyDown={handleKeyDown}
aria-label={t('projects.deleteProjectDialog.ariaLabel', {
projectName: projectName || t('projects.title').toLowerCase(),
})}
>
{t('projects.deleteProjectDialog.deleteButton')}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
)
}

View File

@ -21,12 +21,14 @@ interface DeleteThreadDialogProps {
thread: Thread
onDelete: (threadId: string) => void
onDropdownClose: () => void
variant?: 'default' | 'project'
}
export function DeleteThreadDialog({
thread,
onDelete,
onDropdownClose,
variant = 'default',
}: DeleteThreadDialogProps) {
const { t } = useTranslation()
const navigate = useNavigate()
@ -48,9 +50,11 @@ export function DeleteThreadDialog({
id: 'delete-thread',
description: t('common:toast.deleteThread.description'),
})
setTimeout(() => {
navigate({ to: route.home })
}, 0)
if (variant !== 'project') {
setTimeout(() => {
navigate({ to: route.home })
}, 0)
}
}
const handleKeyDown = (e: React.KeyboardEvent) => {

View File

@ -6,4 +6,5 @@ export { MessageMetadataDialog } from './MessageMetadataDialog'
export { DeleteMessageDialog } from './DeleteMessageDialog'
export { FactoryResetDialog } from './FactoryResetDialog'
export { DeleteAssistantDialog } from './DeleteAssistantDialog'
export { AddProviderDialog } from './AddProviderDialog'
export { DeleteProjectDialog } from './DeleteProjectDialog'
export { AddProviderDialog } from './AddProviderDialog'

View File

@ -288,9 +288,40 @@ export const useModelProvider = create<ModelProviderState>()(
})
}
if (version <= 2 && state?.providers) {
state.providers.forEach((provider) => {
// Update cont_batching description for llamacpp provider
if (provider.provider === 'llamacpp' && provider.settings) {
const contBatchingSetting = provider.settings.find(
(s) => s.key === 'cont_batching'
)
if (contBatchingSetting) {
contBatchingSetting.description =
'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
}
}
// Migrate model settings
if (provider.models && provider.provider === 'llamacpp') {
provider.models.forEach((model) => {
if (!model.settings) model.settings = {}
if (!model.settings.batch_size) {
model.settings.batch_size = {
...modelSettings.batch_size,
controller_props: {
...modelSettings.batch_size.controller_props,
},
}
}
})
}
})
}
return state
},
version: 2,
version: 3,
}
)
)

View File

@ -0,0 +1,82 @@
import { create } from 'zustand'
import { persist, createJSONStorage } from 'zustand/middleware'
import { ulid } from 'ulidx'
import { localStorageKey } from '@/constants/localStorage'
import { useThreads } from '@/hooks/useThreads'
type ThreadFolder = {
id: string
name: string
updated_at: number
}
type ThreadManagementState = {
folders: ThreadFolder[]
setFolders: (folders: ThreadFolder[]) => void
addFolder: (name: string) => void
updateFolder: (id: string, name: string) => void
deleteFolder: (id: string) => void
getFolderById: (id: string) => ThreadFolder | undefined
}
export const useThreadManagement = create<ThreadManagementState>()(
persist(
(set, get) => ({
folders: [],
setFolders: (folders) => {
set({ folders })
},
addFolder: (name) => {
const newFolder: ThreadFolder = {
id: ulid(),
name,
updated_at: Date.now(),
}
set((state) => ({
folders: [...state.folders, newFolder],
}))
},
updateFolder: (id, name) => {
set((state) => ({
folders: state.folders.map((folder) =>
folder.id === id
? { ...folder, name, updated_at: Date.now() }
: folder
),
}))
},
deleteFolder: (id) => {
// Remove project metadata from all threads that belong to this project
const threadsState = useThreads.getState()
const threadsToUpdate = Object.values(threadsState.threads).filter(
(thread) => thread.metadata?.project?.id === id
)
threadsToUpdate.forEach((thread) => {
threadsState.updateThread(thread.id, {
metadata: {
...thread.metadata,
project: undefined,
},
})
})
set((state) => ({
folders: state.folders.filter((folder) => folder.id !== id),
}))
},
getFolderById: (id) => {
return get().folders.find((folder) => folder.id === id)
},
}),
{
name: localStorageKey.threadManagement,
storage: createJSONStorage(() => localStorage),
}
)
)

View File

@ -20,12 +20,14 @@ type ThreadState = {
createThread: (
model: ThreadModel,
title?: string,
assistant?: Assistant
assistant?: Assistant,
projectMetadata?: { id: string; name: string; updated_at: number }
) => Promise<Thread>
updateCurrentThreadModel: (model: ThreadModel) => void
getFilteredThreads: (searchTerm: string) => Thread[]
updateCurrentThreadAssistant: (assistant: Assistant) => void
updateThreadTimestamp: (threadId: string) => void
updateThread: (threadId: string, updates: Partial<Thread>) => void
searchIndex: Fzf<Thread[]> | null
}
@ -132,20 +134,28 @@ export const useThreads = create<ThreadState>()((set, get) => ({
deleteAllThreads: () => {
set((state) => {
const allThreadIds = Object.keys(state.threads)
const favoriteThreadIds = allThreadIds.filter(
(threadId) => state.threads[threadId].isFavorite
)
const nonFavoriteThreadIds = allThreadIds.filter(
(threadId) => !state.threads[threadId].isFavorite
// Identify threads to keep (favorites OR have project metadata)
const threadsToKeepIds = allThreadIds.filter(
(threadId) =>
state.threads[threadId].isFavorite ||
state.threads[threadId].metadata?.project
)
// Only delete non-favorite threads
nonFavoriteThreadIds.forEach((threadId) => {
// Identify threads to delete (non-favorites AND no project metadata)
const threadsToDeleteIds = allThreadIds.filter(
(threadId) =>
!state.threads[threadId].isFavorite &&
!state.threads[threadId].metadata?.project
)
// Delete threads that are not favorites and not in projects
threadsToDeleteIds.forEach((threadId) => {
getServiceHub().threads().deleteThread(threadId)
})
// Keep only favorite threads
const remainingThreads = favoriteThreadIds.reduce(
// Keep favorite threads and threads with project metadata
const remainingThreads = threadsToKeepIds.reduce(
(acc, threadId) => {
acc[threadId] = state.threads[threadId]
return acc
@ -208,13 +218,18 @@ export const useThreads = create<ThreadState>()((set, get) => ({
setCurrentThreadId: (threadId) => {
if (threadId !== get().currentThreadId) set({ currentThreadId: threadId })
},
createThread: async (model, title, assistant) => {
createThread: async (model, title, assistant, projectMetadata) => {
const newThread: Thread = {
id: ulid(),
title: title ?? 'New Thread',
model,
updated: Date.now() / 1000,
assistants: assistant ? [assistant] : [],
...(projectMetadata && {
metadata: {
project: projectMetadata,
},
}),
}
return await getServiceHub()
.threads()
@ -328,4 +343,26 @@ export const useThreads = create<ThreadState>()((set, get) => ({
}
})
},
updateThread: (threadId, updates) => {
set((state) => {
const thread = state.threads[threadId]
if (!thread) return state
const updatedThread = {
...thread,
...updates,
updated: Date.now() / 1000,
}
getServiceHub().threads().updateThread(updatedThread)
const newThreads = { ...state.threads, [threadId]: updatedThread }
return {
threads: newThreads,
searchIndex: new Fzf<Thread[]>(Object.values(newThreads), {
selector: (item: Thread) => item.title,
}),
}
})
},
}))

View File

@ -153,4 +153,16 @@ export const modelSettings = {
value: false,
},
},
batch_size: {
key: 'batch_size',
title: 'Batch Size',
description: 'Logical maximum batch size for processing prompts.',
controller_type: 'input',
controller_props: {
value: 2048,
placeholder: '2048',
type: 'number',
textAlign: 'right',
},
},
}

View File

@ -117,6 +117,7 @@
"chatInput": "Frage mich etwas..."
},
"confirm": "Bestätige",
"continue": "Weiter",
"loading": "Lade...",
"error": "Fehler",
"success": "Erfolg",
@ -127,6 +128,7 @@
"createAssistant": "Assistenten anlegen",
"enterApiKey": "API Key eingeben",
"scrollToBottom": "Zum Ende scrollen",
"generateAiResponse": "KI-Antwort generieren",
"addModel": {
"title": "Modell hinzufügen",
"modelId": "Modell ID",
@ -154,12 +156,12 @@
"delete": "Löschen"
},
"editJson": {
"errorParse": "Failed to parse JSON",
"errorPaste": "Failed to paste JSON",
"errorFormat": "Invalid JSON format",
"titleAll": "Edit All Servers Configuration",
"placeholder": "Enter JSON configuration...",
"save": "Save"
"errorParse": "JSON-Parsing fehlgeschlagen",
"errorPaste": "JSON-Einfügen fehlgeschlagen",
"errorFormat": "Ungültiges JSON-Format",
"titleAll": "Alle Serverkonfigurationen bearbeiten",
"placeholder": "JSON-Konfiguration eingeben...",
"save": "Speichern"
},
"editModel": {
"title": "Modell bearbeiten: {{modelId}}",
@ -228,11 +230,85 @@
"title": "Nachricht Metadaten"
}
},
"projects": {
"title": "Projekte",
"addProject": "Projekt hinzufügen",
"addToProject": "Zum Projekt hinzufügen",
"removeFromProject": "Vom Projekt entfernen",
"createNewProject": "Neues Projekt erstellen",
"editProject": "Projekt bearbeiten",
"deleteProject": "Projekt löschen",
"projectName": "Projektname",
"enterProjectName": "Projektname eingeben...",
"noProjectsAvailable": "Keine Projekte verfügbar",
"noProjectsYet": "Noch keine Projekte",
"noProjectsYetDesc": "Starten Sie ein neues Projekt, indem Sie auf die Schaltfläche Projekt hinzufügen klicken.",
"projectNotFound": "Projekt nicht gefunden",
"projectNotFoundDesc": "Das gesuchte Projekt existiert nicht oder wurde gelöscht.",
"deleteProjectDialog": {
"title": "Projekt löschen",
"description": "Sind Sie sicher, dass Sie dieses Projekt löschen möchten? Diese Aktion kann nicht rückgängig gemacht werden.",
"deleteButton": "Löschen",
"successWithName": "Projekt \"{{projectName}}\" erfolgreich gelöscht",
"successWithoutName": "Projekt erfolgreich gelöscht",
"error": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut.",
"ariaLabel": "{{projectName}} löschen"
},
"addProjectDialog": {
"createTitle": "Neues Projekt erstellen",
"editTitle": "Projekt bearbeiten",
"nameLabel": "Projektname",
"namePlaceholder": "Projektname eingeben...",
"createButton": "Erstellen",
"updateButton": "Aktualisieren",
"alreadyExists": "Projekt \"{{projectName}}\" existiert bereits",
"createSuccess": "Projekt \"{{projectName}}\" erfolgreich erstellt",
"renameSuccess": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
},
"noConversationsIn": "Keine Gespräche in {{projectName}}",
"startNewConversation": "Starten Sie ein neues Gespräch mit {{projectName}} unten",
"conversationsIn": "Gespräche in {{projectName}}",
"conversationsDescription": "Klicken Sie auf ein Gespräch, um weiterzuchatten, oder starten Sie unten ein neues.",
"thread": "Thread",
"threads": "Threads",
"updated": "Aktualisiert:",
"collapseThreads": "Threads einklappen",
"expandThreads": "Threads ausklappen",
"update": "Aktualisieren"
},
"toast": {
"allThreadsUnfavorited": {
"title": "Alle Threads De-Favorisieren ",
"description": "Alle deine Threads wurden defavorisiert."
},
"projectCreated": {
"title": "Projekt erstellt",
"description": "Projekt \"{{projectName}}\" erfolgreich erstellt"
},
"projectRenamed": {
"title": "Projekt umbenannt",
"description": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
},
"projectDeleted": {
"title": "Projekt gelöscht",
"description": "Projekt \"{{projectName}}\" erfolgreich gelöscht"
},
"projectAlreadyExists": {
"title": "Projekt existiert bereits",
"description": "Projekt \"{{projectName}}\" existiert bereits"
},
"projectDeleteFailed": {
"title": "Löschen fehlgeschlagen",
"description": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut."
},
"threadAssignedToProject": {
"title": "Thread zugewiesen",
"description": "Thread erfolgreich zu \"{{projectName}}\" hinzugefügt"
},
"threadRemovedFromProject": {
"title": "Thread entfernt",
"description": "Thread erfolgreich von \"{{projectName}}\" entfernt"
},
"deleteAllThreads": {
"title": "Alle Threads löschen",
"description": "Alle deine Threads wurden permanent gelöscht."
@ -280,6 +356,80 @@
"downloadAndVerificationComplete": {
"title": "Download abgeschlossen",
"description": "Modell \"{{item}}\" erfolgreich heruntergeladen und verifiziert"
},
"projectCreated": {
"title": "Projekt erstellt",
"description": "Projekt \"{{projectName}}\" erfolgreich erstellt"
},
"projectRenamed": {
"title": "Projekt umbenannt",
"description": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
},
"projectDeleted": {
"title": "Projekt gelöscht",
"description": "Projekt \"{{projectName}}\" erfolgreich gelöscht"
},
"projectAlreadyExists": {
"title": "Projekt existiert bereits",
"description": "Projekt \"{{projectName}}\" existiert bereits"
},
"projectDeleteFailed": {
"title": "Löschen fehlgeschlagen",
"description": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut."
},
"threadAssignedToProject": {
"title": "Thread zugewiesen",
"description": "Thread erfolgreich zu \"{{projectName}}\" hinzugefügt"
},
"threadRemovedFromProject": {
"title": "Thread entfernt",
"description": "Thread erfolgreich von \"{{projectName}}\" entfernt"
}
},
"projects": {
"title": "Projekte",
"addProject": "Projekt hinzufügen",
"addToProject": "Zu Projekt hinzufügen",
"removeFromProject": "Von Projekt entfernen",
"createNewProject": "Neues Projekt erstellen",
"editProject": "Projekt bearbeiten",
"deleteProject": "Projekt löschen",
"projectName": "Projektname",
"enterProjectName": "Projektname eingeben...",
"noProjectsAvailable": "Keine Projekte verfügbar",
"noProjectsYet": "Noch keine Projekte",
"noProjectsYetDesc": "Starten Sie ein neues Projekt, indem Sie auf die Schaltfläche Projekt hinzufügen klicken.",
"projectNotFound": "Projekt nicht gefunden",
"projectNotFoundDesc": "Das gesuchte Projekt existiert nicht oder wurde gelöscht.",
"deleteProjectDialog": {
"title": "Projekt löschen",
"description": "Sind Sie sicher, dass Sie dieses Projekt löschen möchten? Diese Aktion kann nicht rückgängig gemacht werden.",
"deleteButton": "Löschen",
"successWithName": "Projekt \"{{projectName}}\" erfolgreich gelöscht",
"successWithoutName": "Projekt erfolgreich gelöscht",
"error": "Projekt konnte nicht gelöscht werden. Bitte versuchen Sie es erneut.",
"ariaLabel": "{{projectName}} löschen"
},
"addProjectDialog": {
"createTitle": "Neues Projekt erstellen",
"editTitle": "Projekt bearbeiten",
"nameLabel": "Projektname",
"namePlaceholder": "Projektname eingeben...",
"createButton": "Erstellen",
"updateButton": "Aktualisieren",
"alreadyExists": "Projekt \"{{projectName}}\" existiert bereits",
"createSuccess": "Projekt \"{{projectName}}\" erfolgreich erstellt",
"renameSuccess": "Projekt von \"{{oldName}}\" zu \"{{newName}}\" umbenannt"
},
"noConversationsIn": "Keine Gespräche in {{projectName}}",
"startNewConversation": "Starten Sie ein neues Gespräch mit {{projectName}} unten",
"conversationsIn": "Gespräche in {{projectName}}",
"conversationsDescription": "Klicken Sie auf ein Gespräch, um weiterzuchatten, oder starten Sie unten ein neues.",
"thread": "Thread",
"threads": "Threads",
"updated": "Aktualisiert:",
"collapseThreads": "Threads einklappen",
"expandThreads": "Threads ausklappen",
"update": "Aktualisieren"
}
}

View File

@ -119,6 +119,7 @@
"chatInput": "Ask me anything..."
},
"confirm": "Confirm",
"continue": "Continue",
"loading": "Loading...",
"error": "Error",
"success": "Success",
@ -231,6 +232,52 @@
"title": "Message Metadata"
}
},
"projects": {
"title": "Projects",
"addProject": "Add Project",
"addToProject": "Add to project",
"removeFromProject": "Remove from project",
"createNewProject": "Create New Project",
"editProject": "Edit Project",
"deleteProject": "Delete Project",
"projectName": "Project Name",
"enterProjectName": "Enter project name...",
"noProjectsAvailable": "No projects available",
"noProjectsYet": "No Projects Yet",
"noProjectsYetDesc": "Start a new project by clicking the Add Project button.",
"projectNotFound": "Project Not Found",
"projectNotFoundDesc": "The project you're looking for doesn't exist or has been deleted.",
"deleteProjectDialog": {
"title": "Delete Project",
"description": "Are you sure you want to delete this project? This action cannot be undone.",
"deleteButton": "Delete",
"successWithName": "Project \"{{projectName}}\" deleted successfully",
"successWithoutName": "Project deleted successfully",
"error": "Failed to delete project. Please try again.",
"ariaLabel": "Delete {{projectName}}"
},
"addProjectDialog": {
"createTitle": "Create New Project",
"editTitle": "Edit Project",
"nameLabel": "Project Name",
"namePlaceholder": "Enter project name...",
"createButton": "Create",
"updateButton": "Update",
"alreadyExists": "Project \"{{projectName}}\" already exists",
"createSuccess": "Project \"{{projectName}}\" created successfully",
"renameSuccess": "Project renamed from \"{{oldName}}\" to \"{{newName}}\""
},
"noConversationsIn": "No Conversations in {{projectName}}",
"startNewConversation": "Start a new conversation with {{projectName}} below",
"conversationsIn": "Conversations in {{projectName}}",
"conversationsDescription": "Click on any conversation to continue chatting, or start a new one below.",
"thread": "thread",
"threads": "threads",
"updated": "Updated:",
"collapseThreads": "Collapse threads",
"expandThreads": "Expand threads",
"update": "Update"
},
"toast": {
"allThreadsUnfavorited": {
"title": "All Threads Unfavorited",
@ -283,6 +330,34 @@
"downloadAndVerificationComplete": {
"title": "Download Complete",
"description": "Model \"{{item}}\" downloaded and verified successfully"
},
"projectCreated": {
"title": "Project Created",
"description": "Project \"{{projectName}}\" created successfully"
},
"projectRenamed": {
"title": "Project Renamed",
"description": "Project renamed from \"{{oldName}}\" to \"{{newName}}\""
},
"projectDeleted": {
"title": "Project Deleted",
"description": "Project \"{{projectName}}\" deleted successfully"
},
"projectAlreadyExists": {
"title": "Project Already Exists",
"description": "Project \"{{projectName}}\" already exists"
},
"projectDeleteFailed": {
"title": "Delete Failed",
"description": "Failed to delete project. Please try again."
},
"threadAssignedToProject": {
"title": "Thread Assigned",
"description": "Thread assigned to \"{{projectName}}\" successfully"
},
"threadRemovedFromProject": {
"title": "Thread Removed",
"description": "Thread removed from \"{{projectName}}\" successfully"
}
}
}

View File

@ -117,6 +117,7 @@
"chatInput": "Tanyakan apa saja padaku..."
},
"confirm": "Konfirmasi",
"continue": "Lanjutkan",
"loading": "Memuat...",
"error": "Kesalahan",
"success": "Sukses",
@ -127,6 +128,7 @@
"createAssistant": "Buat Asisten",
"enterApiKey": "Masukkan Kunci API",
"scrollToBottom": "Gulir ke bawah",
"generateAiResponse": "Hasilkan Respons AI",
"addModel": {
"title": "Tambah Model",
"modelId": "ID Model",
@ -170,6 +172,13 @@
"embeddings": "Embedding",
"notAvailable": "Belum tersedia"
},
"outOfContextError": {
"truncateInput": "Potong Input",
"title": "Kesalahan konteks habis",
"description": "Obrolan ini mencapai batas memori AI, seperti papan tulis yang penuh. Kami dapat memperluas jendela memori (disebut ukuran konteks) agar lebih mengingat, tetapi mungkin akan menggunakan lebih banyak memori komputer Anda. Kami juga dapat memotong input, yang berarti akan melupakan sebagian riwayat obrolan untuk memberi ruang bagi pesan baru.",
"increaseContextSizeDescription": "Apakah Anda ingin meningkatkan ukuran konteks?",
"increaseContextSize": "Tingkatkan Ukuran Konteks"
},
"toolApproval": {
"title": "Permintaan Izin Alat",
"description": "Asisten ingin menggunakan <strong>{{toolName}}</strong>",
@ -273,6 +282,80 @@
"downloadAndVerificationComplete": {
"title": "Unduhan Selesai",
"description": "Model \"{{item}}\" berhasil diunduh dan diverifikasi"
},
"projectCreated": {
"title": "Proyek Dibuat",
"description": "Proyek \"{{projectName}}\" berhasil dibuat"
},
"projectRenamed": {
"title": "Proyek Diganti Nama",
"description": "Proyek diganti nama dari \"{{oldName}}\" ke \"{{newName}}\""
},
"projectDeleted": {
"title": "Proyek Dihapus",
"description": "Proyek \"{{projectName}}\" berhasil dihapus"
},
"projectAlreadyExists": {
"title": "Proyek Sudah Ada",
"description": "Proyek \"{{projectName}}\" sudah ada"
},
"projectDeleteFailed": {
"title": "Penghapusan Gagal",
"description": "Gagal menghapus proyek. Silakan coba lagi."
},
"threadAssignedToProject": {
"title": "Thread Ditugaskan",
"description": "Thread berhasil ditugaskan ke \"{{projectName}}\""
},
"threadRemovedFromProject": {
"title": "Thread Dihapus",
"description": "Thread berhasil dihapus dari \"{{projectName}}\""
}
},
"projects": {
"title": "Proyek",
"addProject": "Tambah Proyek",
"addToProject": "Tambahkan ke proyek",
"removeFromProject": "Hapus dari proyek",
"createNewProject": "Buat Proyek Baru",
"editProject": "Edit Proyek",
"deleteProject": "Hapus Proyek",
"projectName": "Nama Proyek",
"enterProjectName": "Masukkan nama proyek...",
"noProjectsAvailable": "Tidak ada proyek tersedia",
"noProjectsYet": "Belum Ada Proyek",
"noProjectsYetDesc": "Mulai proyek baru dengan mengklik tombol Tambah Proyek.",
"projectNotFound": "Proyek Tidak Ditemukan",
"projectNotFoundDesc": "Proyek yang Anda cari tidak ada atau telah dihapus.",
"deleteProjectDialog": {
"title": "Hapus Proyek",
"description": "Apakah Anda yakin ingin menghapus proyek ini? Tindakan ini tidak dapat dibatalkan.",
"deleteButton": "Hapus",
"successWithName": "Proyek \"{{projectName}}\" berhasil dihapus",
"successWithoutName": "Proyek berhasil dihapus",
"error": "Gagal menghapus proyek. Silakan coba lagi.",
"ariaLabel": "Hapus {{projectName}}"
},
"addProjectDialog": {
"createTitle": "Buat Proyek Baru",
"editTitle": "Edit Proyek",
"nameLabel": "Nama Proyek",
"namePlaceholder": "Masukkan nama proyek...",
"createButton": "Buat",
"updateButton": "Perbarui",
"alreadyExists": "Proyek \"{{projectName}}\" sudah ada",
"createSuccess": "Proyek \"{{projectName}}\" berhasil dibuat",
"renameSuccess": "Proyek diubah dari \"{{oldName}}\" menjadi \"{{newName}}\""
},
"noConversationsIn": "Tidak Ada Percakapan di {{projectName}}",
"startNewConversation": "Mulai percakapan baru dengan {{projectName}} di bawah",
"conversationsIn": "Percakapan di {{projectName}}",
"conversationsDescription": "Klik percakapan mana pun untuk melanjutkan chatting, atau mulai yang baru di bawah.",
"thread": "utas",
"threads": "utas",
"updated": "Diperbarui:",
"collapseThreads": "Tutup utas",
"expandThreads": "Buka utas",
"update": "Perbarui"
}
}

View File

@ -117,6 +117,7 @@
"chatInput": "Zapytaj mnie o cokolwiek…"
},
"confirm": "Potwierdź",
"continue": "Kontynuuj",
"loading": "Wczytywanie…",
"error": "Błąd",
"success": "Sukces",
@ -229,6 +230,52 @@
"title": "Metadane Wiadomości"
}
},
"projects": {
"title": "Projekty",
"addProject": "Dodaj Projekt",
"addToProject": "Dodaj do projektu",
"removeFromProject": "Usuń z projektu",
"createNewProject": "Utwórz Nowy Projekt",
"editProject": "Edytuj Projekt",
"deleteProject": "Usuń Projekt",
"projectName": "Nazwa Projektu",
"enterProjectName": "Wprowadź nazwę projektu...",
"noProjectsAvailable": "Brak dostępnych projektów",
"noProjectsYet": "Brak Projektów",
"noProjectsYetDesc": "Rozpocznij nowy projekt klikając przycisk Dodaj Projekt.",
"projectNotFound": "Projekt Nie Znaleziony",
"projectNotFoundDesc": "Projekt, którego szukasz nie istnieje lub został usunięty.",
"deleteProjectDialog": {
"title": "Usuń Projekt",
"description": "Na pewno chcesz usunąć ten projekt? Tej operacji nie można cofnąć.",
"deleteButton": "Usuń",
"successWithName": "Projekt \"{{projectName}}\" został pomyślnie usunięty",
"successWithoutName": "Projekt został pomyślnie usunięty",
"error": "Nie udało się usunąć projektu. Spróbuj ponownie.",
"ariaLabel": "Usuń {{projectName}}"
},
"addProjectDialog": {
"createTitle": "Utwórz Nowy Projekt",
"editTitle": "Edytuj Projekt",
"nameLabel": "Nazwa Projektu",
"namePlaceholder": "Wprowadź nazwę projektu...",
"createButton": "Utwórz",
"updateButton": "Aktualizuj",
"alreadyExists": "Projekt \"{{projectName}}\" już istnieje",
"createSuccess": "Projekt \"{{projectName}}\" został pomyślnie utworzony",
"renameSuccess": "Projekt zmieniono z \"{{oldName}}\" na \"{{newName}}\""
},
"noConversationsIn": "Brak Rozmów w {{projectName}}",
"startNewConversation": "Rozpocznij nową rozmowę z {{projectName}} poniżej",
"conversationsIn": "Rozmowy w {{projectName}}",
"conversationsDescription": "Kliknij na dowolną rozmowę aby kontynuować czat, lub rozpocznij nową poniżej.",
"thread": "wątek",
"threads": "wątki",
"updated": "Zaktualizowano:",
"collapseThreads": "Zwiń wątki",
"expandThreads": "Rozwiń wątki",
"update": "Aktualizuj"
},
"toast": {
"allThreadsUnfavorited": {
"title": "Wszystkie Wątki Usunięte z Ulubionych",

View File

@ -117,6 +117,7 @@
"chatInput": "Hỏi tôi bất cứ điều gì..."
},
"confirm": "Xác nhận",
"continue": "Tiếp tục",
"loading": "Đang tải...",
"error": "Lỗi",
"success": "Thành công",

View File

@ -117,6 +117,7 @@
"chatInput": "随便问我什么..."
},
"confirm": "确认",
"continue": "继续",
"loading": "加载中...",
"error": "错误",
"success": "成功",

View File

@ -117,6 +117,7 @@
"chatInput": "問我任何事..."
},
"confirm": "確認",
"continue": "繼續",
"loading": "載入中...",
"error": "錯誤",
"success": "成功",

View File

@ -8,130 +8,330 @@
// You should NOT make any changes in this file as it will be overwritten.
// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
import { Route as rootRouteImport } from './routes/__root'
import { Route as SystemMonitorRouteImport } from './routes/system-monitor'
import { Route as LogsRouteImport } from './routes/logs'
import { Route as AssistantRouteImport } from './routes/assistant'
import { Route as IndexRouteImport } from './routes/index'
import { Route as HubIndexRouteImport } from './routes/hub/index'
import { Route as ThreadsThreadIdRouteImport } from './routes/threads/$threadId'
import { Route as SettingsShortcutsRouteImport } from './routes/settings/shortcuts'
import { Route as SettingsPrivacyRouteImport } from './routes/settings/privacy'
import { Route as SettingsMcpServersRouteImport } from './routes/settings/mcp-servers'
import { Route as SettingsLocalApiServerRouteImport } from './routes/settings/local-api-server'
import { Route as SettingsHttpsProxyRouteImport } from './routes/settings/https-proxy'
import { Route as SettingsHardwareRouteImport } from './routes/settings/hardware'
import { Route as SettingsGeneralRouteImport } from './routes/settings/general'
import { Route as SettingsExtensionsRouteImport } from './routes/settings/extensions'
import { Route as SettingsAppearanceRouteImport } from './routes/settings/appearance'
import { Route as LocalApiServerLogsRouteImport } from './routes/local-api-server/logs'
import { Route as HubModelIdRouteImport } from './routes/hub/$modelId'
import { Route as SettingsProvidersIndexRouteImport } from './routes/settings/providers/index'
import { Route as SettingsProvidersProviderNameRouteImport } from './routes/settings/providers/$providerName'
import { Route as AuthGoogleCallbackRouteImport } from './routes/auth.google.callback'
// Import Routes
const SystemMonitorRoute = SystemMonitorRouteImport.update({
import { Route as rootRoute } from './routes/__root'
import { Route as SystemMonitorImport } from './routes/system-monitor'
import { Route as LogsImport } from './routes/logs'
import { Route as AssistantImport } from './routes/assistant'
import { Route as IndexImport } from './routes/index'
import { Route as ProjectIndexImport } from './routes/project/index'
import { Route as HubIndexImport } from './routes/hub/index'
import { Route as ThreadsThreadIdImport } from './routes/threads/$threadId'
import { Route as SettingsShortcutsImport } from './routes/settings/shortcuts'
import { Route as SettingsPrivacyImport } from './routes/settings/privacy'
import { Route as SettingsMcpServersImport } from './routes/settings/mcp-servers'
import { Route as SettingsLocalApiServerImport } from './routes/settings/local-api-server'
import { Route as SettingsHttpsProxyImport } from './routes/settings/https-proxy'
import { Route as SettingsHardwareImport } from './routes/settings/hardware'
import { Route as SettingsGeneralImport } from './routes/settings/general'
import { Route as SettingsExtensionsImport } from './routes/settings/extensions'
import { Route as SettingsAppearanceImport } from './routes/settings/appearance'
import { Route as ProjectProjectIdImport } from './routes/project/$projectId'
import { Route as LocalApiServerLogsImport } from './routes/local-api-server/logs'
import { Route as HubModelIdImport } from './routes/hub/$modelId'
import { Route as SettingsProvidersIndexImport } from './routes/settings/providers/index'
import { Route as SettingsProvidersProviderNameImport } from './routes/settings/providers/$providerName'
import { Route as AuthGoogleCallbackImport } from './routes/auth.google.callback'
// Create/Update Routes
const SystemMonitorRoute = SystemMonitorImport.update({
id: '/system-monitor',
path: '/system-monitor',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const LogsRoute = LogsRouteImport.update({
const LogsRoute = LogsImport.update({
id: '/logs',
path: '/logs',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const AssistantRoute = AssistantRouteImport.update({
const AssistantRoute = AssistantImport.update({
id: '/assistant',
path: '/assistant',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const IndexRoute = IndexRouteImport.update({
const IndexRoute = IndexImport.update({
id: '/',
path: '/',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const HubIndexRoute = HubIndexRouteImport.update({
const ProjectIndexRoute = ProjectIndexImport.update({
id: '/project/',
path: '/project/',
getParentRoute: () => rootRoute,
} as any)
const HubIndexRoute = HubIndexImport.update({
id: '/hub/',
path: '/hub/',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const ThreadsThreadIdRoute = ThreadsThreadIdRouteImport.update({
const ThreadsThreadIdRoute = ThreadsThreadIdImport.update({
id: '/threads/$threadId',
path: '/threads/$threadId',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsShortcutsRoute = SettingsShortcutsRouteImport.update({
const SettingsShortcutsRoute = SettingsShortcutsImport.update({
id: '/settings/shortcuts',
path: '/settings/shortcuts',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsPrivacyRoute = SettingsPrivacyRouteImport.update({
const SettingsPrivacyRoute = SettingsPrivacyImport.update({
id: '/settings/privacy',
path: '/settings/privacy',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsMcpServersRoute = SettingsMcpServersRouteImport.update({
const SettingsMcpServersRoute = SettingsMcpServersImport.update({
id: '/settings/mcp-servers',
path: '/settings/mcp-servers',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsLocalApiServerRoute = SettingsLocalApiServerRouteImport.update({
const SettingsLocalApiServerRoute = SettingsLocalApiServerImport.update({
id: '/settings/local-api-server',
path: '/settings/local-api-server',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsHttpsProxyRoute = SettingsHttpsProxyRouteImport.update({
const SettingsHttpsProxyRoute = SettingsHttpsProxyImport.update({
id: '/settings/https-proxy',
path: '/settings/https-proxy',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsHardwareRoute = SettingsHardwareRouteImport.update({
const SettingsHardwareRoute = SettingsHardwareImport.update({
id: '/settings/hardware',
path: '/settings/hardware',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsGeneralRoute = SettingsGeneralRouteImport.update({
const SettingsGeneralRoute = SettingsGeneralImport.update({
id: '/settings/general',
path: '/settings/general',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsExtensionsRoute = SettingsExtensionsRouteImport.update({
const SettingsExtensionsRoute = SettingsExtensionsImport.update({
id: '/settings/extensions',
path: '/settings/extensions',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsAppearanceRoute = SettingsAppearanceRouteImport.update({
const SettingsAppearanceRoute = SettingsAppearanceImport.update({
id: '/settings/appearance',
path: '/settings/appearance',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const LocalApiServerLogsRoute = LocalApiServerLogsRouteImport.update({
const ProjectProjectIdRoute = ProjectProjectIdImport.update({
id: '/project/$projectId',
path: '/project/$projectId',
getParentRoute: () => rootRoute,
} as any)
const LocalApiServerLogsRoute = LocalApiServerLogsImport.update({
id: '/local-api-server/logs',
path: '/local-api-server/logs',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const HubModelIdRoute = HubModelIdRouteImport.update({
const HubModelIdRoute = HubModelIdImport.update({
id: '/hub/$modelId',
path: '/hub/$modelId',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsProvidersIndexRoute = SettingsProvidersIndexRouteImport.update({
const SettingsProvidersIndexRoute = SettingsProvidersIndexImport.update({
id: '/settings/providers/',
path: '/settings/providers/',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const SettingsProvidersProviderNameRoute =
SettingsProvidersProviderNameRouteImport.update({
SettingsProvidersProviderNameImport.update({
id: '/settings/providers/$providerName',
path: '/settings/providers/$providerName',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
const AuthGoogleCallbackRoute = AuthGoogleCallbackRouteImport.update({
const AuthGoogleCallbackRoute = AuthGoogleCallbackImport.update({
id: '/auth/google/callback',
path: '/auth/google/callback',
getParentRoute: () => rootRouteImport,
getParentRoute: () => rootRoute,
} as any)
// Populate the FileRoutesByPath interface
declare module '@tanstack/react-router' {
interface FileRoutesByPath {
'/': {
id: '/'
path: '/'
fullPath: '/'
preLoaderRoute: typeof IndexImport
parentRoute: typeof rootRoute
}
'/assistant': {
id: '/assistant'
path: '/assistant'
fullPath: '/assistant'
preLoaderRoute: typeof AssistantImport
parentRoute: typeof rootRoute
}
'/logs': {
id: '/logs'
path: '/logs'
fullPath: '/logs'
preLoaderRoute: typeof LogsImport
parentRoute: typeof rootRoute
}
'/system-monitor': {
id: '/system-monitor'
path: '/system-monitor'
fullPath: '/system-monitor'
preLoaderRoute: typeof SystemMonitorImport
parentRoute: typeof rootRoute
}
'/hub/$modelId': {
id: '/hub/$modelId'
path: '/hub/$modelId'
fullPath: '/hub/$modelId'
preLoaderRoute: typeof HubModelIdImport
parentRoute: typeof rootRoute
}
'/local-api-server/logs': {
id: '/local-api-server/logs'
path: '/local-api-server/logs'
fullPath: '/local-api-server/logs'
preLoaderRoute: typeof LocalApiServerLogsImport
parentRoute: typeof rootRoute
}
'/project/$projectId': {
id: '/project/$projectId'
path: '/project/$projectId'
fullPath: '/project/$projectId'
preLoaderRoute: typeof ProjectProjectIdImport
parentRoute: typeof rootRoute
}
'/settings/appearance': {
id: '/settings/appearance'
path: '/settings/appearance'
fullPath: '/settings/appearance'
preLoaderRoute: typeof SettingsAppearanceImport
parentRoute: typeof rootRoute
}
'/settings/extensions': {
id: '/settings/extensions'
path: '/settings/extensions'
fullPath: '/settings/extensions'
preLoaderRoute: typeof SettingsExtensionsImport
parentRoute: typeof rootRoute
}
'/settings/general': {
id: '/settings/general'
path: '/settings/general'
fullPath: '/settings/general'
preLoaderRoute: typeof SettingsGeneralImport
parentRoute: typeof rootRoute
}
'/settings/hardware': {
id: '/settings/hardware'
path: '/settings/hardware'
fullPath: '/settings/hardware'
preLoaderRoute: typeof SettingsHardwareImport
parentRoute: typeof rootRoute
}
'/settings/https-proxy': {
id: '/settings/https-proxy'
path: '/settings/https-proxy'
fullPath: '/settings/https-proxy'
preLoaderRoute: typeof SettingsHttpsProxyImport
parentRoute: typeof rootRoute
}
'/settings/local-api-server': {
id: '/settings/local-api-server'
path: '/settings/local-api-server'
fullPath: '/settings/local-api-server'
preLoaderRoute: typeof SettingsLocalApiServerImport
parentRoute: typeof rootRoute
}
'/settings/mcp-servers': {
id: '/settings/mcp-servers'
path: '/settings/mcp-servers'
fullPath: '/settings/mcp-servers'
preLoaderRoute: typeof SettingsMcpServersImport
parentRoute: typeof rootRoute
}
'/settings/privacy': {
id: '/settings/privacy'
path: '/settings/privacy'
fullPath: '/settings/privacy'
preLoaderRoute: typeof SettingsPrivacyImport
parentRoute: typeof rootRoute
}
'/settings/shortcuts': {
id: '/settings/shortcuts'
path: '/settings/shortcuts'
fullPath: '/settings/shortcuts'
preLoaderRoute: typeof SettingsShortcutsImport
parentRoute: typeof rootRoute
}
'/threads/$threadId': {
id: '/threads/$threadId'
path: '/threads/$threadId'
fullPath: '/threads/$threadId'
preLoaderRoute: typeof ThreadsThreadIdImport
parentRoute: typeof rootRoute
}
'/hub/': {
id: '/hub/'
path: '/hub'
fullPath: '/hub'
preLoaderRoute: typeof HubIndexImport
parentRoute: typeof rootRoute
}
'/project/': {
id: '/project/'
path: '/project'
fullPath: '/project'
preLoaderRoute: typeof ProjectIndexImport
parentRoute: typeof rootRoute
}
'/auth/google/callback': {
id: '/auth/google/callback'
path: '/auth/google/callback'
fullPath: '/auth/google/callback'
preLoaderRoute: typeof AuthGoogleCallbackImport
parentRoute: typeof rootRoute
}
'/settings/providers/$providerName': {
id: '/settings/providers/$providerName'
path: '/settings/providers/$providerName'
fullPath: '/settings/providers/$providerName'
preLoaderRoute: typeof SettingsProvidersProviderNameImport
parentRoute: typeof rootRoute
}
'/settings/providers/': {
id: '/settings/providers/'
path: '/settings/providers'
fullPath: '/settings/providers'
preLoaderRoute: typeof SettingsProvidersIndexImport
parentRoute: typeof rootRoute
}
}
}
// Create and export the route tree
export interface FileRoutesByFullPath {
'/': typeof IndexRoute
'/assistant': typeof AssistantRoute
@ -139,6 +339,7 @@ export interface FileRoutesByFullPath {
'/system-monitor': typeof SystemMonitorRoute
'/hub/$modelId': typeof HubModelIdRoute
'/local-api-server/logs': typeof LocalApiServerLogsRoute
'/project/$projectId': typeof ProjectProjectIdRoute
'/settings/appearance': typeof SettingsAppearanceRoute
'/settings/extensions': typeof SettingsExtensionsRoute
'/settings/general': typeof SettingsGeneralRoute
@ -150,10 +351,12 @@ export interface FileRoutesByFullPath {
'/settings/shortcuts': typeof SettingsShortcutsRoute
'/threads/$threadId': typeof ThreadsThreadIdRoute
'/hub': typeof HubIndexRoute
'/project': typeof ProjectIndexRoute
'/auth/google/callback': typeof AuthGoogleCallbackRoute
'/settings/providers/$providerName': typeof SettingsProvidersProviderNameRoute
'/settings/providers': typeof SettingsProvidersIndexRoute
}
export interface FileRoutesByTo {
'/': typeof IndexRoute
'/assistant': typeof AssistantRoute
@ -161,6 +364,7 @@ export interface FileRoutesByTo {
'/system-monitor': typeof SystemMonitorRoute
'/hub/$modelId': typeof HubModelIdRoute
'/local-api-server/logs': typeof LocalApiServerLogsRoute
'/project/$projectId': typeof ProjectProjectIdRoute
'/settings/appearance': typeof SettingsAppearanceRoute
'/settings/extensions': typeof SettingsExtensionsRoute
'/settings/general': typeof SettingsGeneralRoute
@ -172,18 +376,21 @@ export interface FileRoutesByTo {
'/settings/shortcuts': typeof SettingsShortcutsRoute
'/threads/$threadId': typeof ThreadsThreadIdRoute
'/hub': typeof HubIndexRoute
'/project': typeof ProjectIndexRoute
'/auth/google/callback': typeof AuthGoogleCallbackRoute
'/settings/providers/$providerName': typeof SettingsProvidersProviderNameRoute
'/settings/providers': typeof SettingsProvidersIndexRoute
}
export interface FileRoutesById {
__root__: typeof rootRouteImport
'__root__': typeof rootRoute
'/': typeof IndexRoute
'/assistant': typeof AssistantRoute
'/logs': typeof LogsRoute
'/system-monitor': typeof SystemMonitorRoute
'/hub/$modelId': typeof HubModelIdRoute
'/local-api-server/logs': typeof LocalApiServerLogsRoute
'/project/$projectId': typeof ProjectProjectIdRoute
'/settings/appearance': typeof SettingsAppearanceRoute
'/settings/extensions': typeof SettingsExtensionsRoute
'/settings/general': typeof SettingsGeneralRoute
@ -195,10 +402,12 @@ export interface FileRoutesById {
'/settings/shortcuts': typeof SettingsShortcutsRoute
'/threads/$threadId': typeof ThreadsThreadIdRoute
'/hub/': typeof HubIndexRoute
'/project/': typeof ProjectIndexRoute
'/auth/google/callback': typeof AuthGoogleCallbackRoute
'/settings/providers/$providerName': typeof SettingsProvidersProviderNameRoute
'/settings/providers/': typeof SettingsProvidersIndexRoute
}
export interface FileRouteTypes {
fileRoutesByFullPath: FileRoutesByFullPath
fullPaths:
@ -208,6 +417,7 @@ export interface FileRouteTypes {
| '/system-monitor'
| '/hub/$modelId'
| '/local-api-server/logs'
| '/project/$projectId'
| '/settings/appearance'
| '/settings/extensions'
| '/settings/general'
@ -219,6 +429,7 @@ export interface FileRouteTypes {
| '/settings/shortcuts'
| '/threads/$threadId'
| '/hub'
| '/project'
| '/auth/google/callback'
| '/settings/providers/$providerName'
| '/settings/providers'
@ -230,6 +441,7 @@ export interface FileRouteTypes {
| '/system-monitor'
| '/hub/$modelId'
| '/local-api-server/logs'
| '/project/$projectId'
| '/settings/appearance'
| '/settings/extensions'
| '/settings/general'
@ -241,6 +453,7 @@ export interface FileRouteTypes {
| '/settings/shortcuts'
| '/threads/$threadId'
| '/hub'
| '/project'
| '/auth/google/callback'
| '/settings/providers/$providerName'
| '/settings/providers'
@ -252,6 +465,7 @@ export interface FileRouteTypes {
| '/system-monitor'
| '/hub/$modelId'
| '/local-api-server/logs'
| '/project/$projectId'
| '/settings/appearance'
| '/settings/extensions'
| '/settings/general'
@ -263,11 +477,13 @@ export interface FileRouteTypes {
| '/settings/shortcuts'
| '/threads/$threadId'
| '/hub/'
| '/project/'
| '/auth/google/callback'
| '/settings/providers/$providerName'
| '/settings/providers/'
fileRoutesById: FileRoutesById
}
export interface RootRouteChildren {
IndexRoute: typeof IndexRoute
AssistantRoute: typeof AssistantRoute
@ -275,6 +491,7 @@ export interface RootRouteChildren {
SystemMonitorRoute: typeof SystemMonitorRoute
HubModelIdRoute: typeof HubModelIdRoute
LocalApiServerLogsRoute: typeof LocalApiServerLogsRoute
ProjectProjectIdRoute: typeof ProjectProjectIdRoute
SettingsAppearanceRoute: typeof SettingsAppearanceRoute
SettingsExtensionsRoute: typeof SettingsExtensionsRoute
SettingsGeneralRoute: typeof SettingsGeneralRoute
@ -286,156 +503,12 @@ export interface RootRouteChildren {
SettingsShortcutsRoute: typeof SettingsShortcutsRoute
ThreadsThreadIdRoute: typeof ThreadsThreadIdRoute
HubIndexRoute: typeof HubIndexRoute
ProjectIndexRoute: typeof ProjectIndexRoute
AuthGoogleCallbackRoute: typeof AuthGoogleCallbackRoute
SettingsProvidersProviderNameRoute: typeof SettingsProvidersProviderNameRoute
SettingsProvidersIndexRoute: typeof SettingsProvidersIndexRoute
}
declare module '@tanstack/react-router' {
interface FileRoutesByPath {
'/system-monitor': {
id: '/system-monitor'
path: '/system-monitor'
fullPath: '/system-monitor'
preLoaderRoute: typeof SystemMonitorRouteImport
parentRoute: typeof rootRouteImport
}
'/logs': {
id: '/logs'
path: '/logs'
fullPath: '/logs'
preLoaderRoute: typeof LogsRouteImport
parentRoute: typeof rootRouteImport
}
'/assistant': {
id: '/assistant'
path: '/assistant'
fullPath: '/assistant'
preLoaderRoute: typeof AssistantRouteImport
parentRoute: typeof rootRouteImport
}
'/': {
id: '/'
path: '/'
fullPath: '/'
preLoaderRoute: typeof IndexRouteImport
parentRoute: typeof rootRouteImport
}
'/hub/': {
id: '/hub/'
path: '/hub'
fullPath: '/hub'
preLoaderRoute: typeof HubIndexRouteImport
parentRoute: typeof rootRouteImport
}
'/threads/$threadId': {
id: '/threads/$threadId'
path: '/threads/$threadId'
fullPath: '/threads/$threadId'
preLoaderRoute: typeof ThreadsThreadIdRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/shortcuts': {
id: '/settings/shortcuts'
path: '/settings/shortcuts'
fullPath: '/settings/shortcuts'
preLoaderRoute: typeof SettingsShortcutsRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/privacy': {
id: '/settings/privacy'
path: '/settings/privacy'
fullPath: '/settings/privacy'
preLoaderRoute: typeof SettingsPrivacyRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/mcp-servers': {
id: '/settings/mcp-servers'
path: '/settings/mcp-servers'
fullPath: '/settings/mcp-servers'
preLoaderRoute: typeof SettingsMcpServersRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/local-api-server': {
id: '/settings/local-api-server'
path: '/settings/local-api-server'
fullPath: '/settings/local-api-server'
preLoaderRoute: typeof SettingsLocalApiServerRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/https-proxy': {
id: '/settings/https-proxy'
path: '/settings/https-proxy'
fullPath: '/settings/https-proxy'
preLoaderRoute: typeof SettingsHttpsProxyRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/hardware': {
id: '/settings/hardware'
path: '/settings/hardware'
fullPath: '/settings/hardware'
preLoaderRoute: typeof SettingsHardwareRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/general': {
id: '/settings/general'
path: '/settings/general'
fullPath: '/settings/general'
preLoaderRoute: typeof SettingsGeneralRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/extensions': {
id: '/settings/extensions'
path: '/settings/extensions'
fullPath: '/settings/extensions'
preLoaderRoute: typeof SettingsExtensionsRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/appearance': {
id: '/settings/appearance'
path: '/settings/appearance'
fullPath: '/settings/appearance'
preLoaderRoute: typeof SettingsAppearanceRouteImport
parentRoute: typeof rootRouteImport
}
'/local-api-server/logs': {
id: '/local-api-server/logs'
path: '/local-api-server/logs'
fullPath: '/local-api-server/logs'
preLoaderRoute: typeof LocalApiServerLogsRouteImport
parentRoute: typeof rootRouteImport
}
'/hub/$modelId': {
id: '/hub/$modelId'
path: '/hub/$modelId'
fullPath: '/hub/$modelId'
preLoaderRoute: typeof HubModelIdRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/providers/': {
id: '/settings/providers/'
path: '/settings/providers'
fullPath: '/settings/providers'
preLoaderRoute: typeof SettingsProvidersIndexRouteImport
parentRoute: typeof rootRouteImport
}
'/settings/providers/$providerName': {
id: '/settings/providers/$providerName'
path: '/settings/providers/$providerName'
fullPath: '/settings/providers/$providerName'
preLoaderRoute: typeof SettingsProvidersProviderNameRouteImport
parentRoute: typeof rootRouteImport
}
'/auth/google/callback': {
id: '/auth/google/callback'
path: '/auth/google/callback'
fullPath: '/auth/google/callback'
preLoaderRoute: typeof AuthGoogleCallbackRouteImport
parentRoute: typeof rootRouteImport
}
}
}
const rootRouteChildren: RootRouteChildren = {
IndexRoute: IndexRoute,
AssistantRoute: AssistantRoute,
@ -443,6 +516,7 @@ const rootRouteChildren: RootRouteChildren = {
SystemMonitorRoute: SystemMonitorRoute,
HubModelIdRoute: HubModelIdRoute,
LocalApiServerLogsRoute: LocalApiServerLogsRoute,
ProjectProjectIdRoute: ProjectProjectIdRoute,
SettingsAppearanceRoute: SettingsAppearanceRoute,
SettingsExtensionsRoute: SettingsExtensionsRoute,
SettingsGeneralRoute: SettingsGeneralRoute,
@ -454,10 +528,112 @@ const rootRouteChildren: RootRouteChildren = {
SettingsShortcutsRoute: SettingsShortcutsRoute,
ThreadsThreadIdRoute: ThreadsThreadIdRoute,
HubIndexRoute: HubIndexRoute,
ProjectIndexRoute: ProjectIndexRoute,
AuthGoogleCallbackRoute: AuthGoogleCallbackRoute,
SettingsProvidersProviderNameRoute: SettingsProvidersProviderNameRoute,
SettingsProvidersIndexRoute: SettingsProvidersIndexRoute,
}
export const routeTree = rootRouteImport
export const routeTree = rootRoute
._addFileChildren(rootRouteChildren)
._addFileTypes<FileRouteTypes>()
/* ROUTE_MANIFEST_START
{
"routes": {
"__root__": {
"filePath": "__root.tsx",
"children": [
"/",
"/assistant",
"/logs",
"/system-monitor",
"/hub/$modelId",
"/local-api-server/logs",
"/project/$projectId",
"/settings/appearance",
"/settings/extensions",
"/settings/general",
"/settings/hardware",
"/settings/https-proxy",
"/settings/local-api-server",
"/settings/mcp-servers",
"/settings/privacy",
"/settings/shortcuts",
"/threads/$threadId",
"/hub/",
"/project/",
"/auth/google/callback",
"/settings/providers/$providerName",
"/settings/providers/"
]
},
"/": {
"filePath": "index.tsx"
},
"/assistant": {
"filePath": "assistant.tsx"
},
"/logs": {
"filePath": "logs.tsx"
},
"/system-monitor": {
"filePath": "system-monitor.tsx"
},
"/hub/$modelId": {
"filePath": "hub/$modelId.tsx"
},
"/local-api-server/logs": {
"filePath": "local-api-server/logs.tsx"
},
"/project/$projectId": {
"filePath": "project/$projectId.tsx"
},
"/settings/appearance": {
"filePath": "settings/appearance.tsx"
},
"/settings/extensions": {
"filePath": "settings/extensions.tsx"
},
"/settings/general": {
"filePath": "settings/general.tsx"
},
"/settings/hardware": {
"filePath": "settings/hardware.tsx"
},
"/settings/https-proxy": {
"filePath": "settings/https-proxy.tsx"
},
"/settings/local-api-server": {
"filePath": "settings/local-api-server.tsx"
},
"/settings/mcp-servers": {
"filePath": "settings/mcp-servers.tsx"
},
"/settings/privacy": {
"filePath": "settings/privacy.tsx"
},
"/settings/shortcuts": {
"filePath": "settings/shortcuts.tsx"
},
"/threads/$threadId": {
"filePath": "threads/$threadId.tsx"
},
"/hub/": {
"filePath": "hub/index.tsx"
},
"/project/": {
"filePath": "project/index.tsx"
},
"/auth/google/callback": {
"filePath": "auth.google.callback.tsx"
},
"/settings/providers/$providerName": {
"filePath": "settings/providers/$providerName.tsx"
},
"/settings/providers/": {
"filePath": "settings/providers/index.tsx"
}
}
}
ROUTE_MANIFEST_END */

View File

@ -12,6 +12,7 @@ import { AvatarEmoji } from '@/containers/AvatarEmoji'
import { useTranslation } from '@/i18n/react-i18next-compat'
import { PlatformGuard } from '@/lib/platform/PlatformGuard'
import { PlatformFeature } from '@/lib/platform/types'
import { Button } from '@/components/ui/button'
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export const Route = createFileRoute(route.assistant as any)({
@ -59,74 +60,73 @@ function AssistantContent() {
}
return (
<div className="flex h-full flex-col flex-justify-center">
<div className="flex h-full flex-col justify-center">
<HeaderPage>
<span>{t('assistants:title')}</span>
<div className="flex items-center justify-between w-full mr-2">
<span>{t('assistants:title')}</span>
<Button
onClick={() => {
setEditingKey(null)
setOpen(true)
}}
size="sm"
className="relative z-50"
>
<IconCirclePlus size={16} />
Add Assistant
</Button>
</div>
</HeaderPage>
<div className="h-full p-4 overflow-y-auto">
<div className="grid grid-cols-1 sm:grid-cols-2 md:grid-cols-3 gap-4">
<div className="h-full p-4 w-3/4 mx-auto overflow-y-auto mt-2">
<div className="space-y-3">
{assistants
.slice()
.sort((a, b) => a.created_at - b.created_at)
.map((assistant) => (
<div
className="bg-main-view-fg/3 p-3 rounded-md"
className="bg-main-view-fg/3 py-2 px-4 rounded-lg flex items-center gap-4"
key={assistant.id}
>
<div className="flex items-center justify-between gap-2">
<h3 className="text-base font-medium text-main-view-fg/80">
<div className="flex items-center gap-1">
{assistant?.avatar && (
<span className="shrink-0 w-4 h-4 relative flex items-center justify-center">
<AvatarEmoji
avatar={assistant?.avatar}
imageClassName="object-cover"
textClassName="text-sm"
/>
</span>
)}
<span className="line-clamp-1">{assistant.name}</span>
</div>
</h3>
<div className="flex items-center gap-0.5">
<div
className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
title={t('assistants:editAssistant')}
onClick={() => {
setEditingKey(assistant.id)
setOpen(true)
}}
>
<IconPencil size={18} className="text-main-view-fg/50" />
</div>
<div
className="size-6 cursor-pointer flex items-center justify-center rounded hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
title={t('assistants:deleteAssistant')}
onClick={() => handleDelete(assistant.id)}
>
<IconTrash size={18} className="text-main-view-fg/50" />
<div className="flex items-start gap-3 flex-1">
{assistant?.avatar && (
<div className="shrink-0 w-8 h-8 relative flex items-center justify-center bg-main-view-fg/4 rounded-md">
<AvatarEmoji
avatar={assistant?.avatar}
imageClassName="w-5 h-5 object-contain"
textClassName="text-lg"
/>
</div>
)}
<div className="flex-1 min-w-0">
<h3 className="text-base font-medium text-main-view-fg/80 line-clamp-1">
{assistant.name}
</h3>
<p className="text-main-view-fg/50 text-sm line-clamp-2 mt-0.5">
{assistant.description}
</p>
</div>
</div>
<p
className="text-main-view-fg/50 mt-1 line-clamp-2"
title={assistant.description}
>
{assistant.description}
</p>
<div className="flex items-center">
<button
className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
title={t('assistants:editAssistant')}
onClick={() => {
setEditingKey(assistant.id)
setOpen(true)
}}
>
<IconPencil size={16} className="text-main-view-fg/50" />
</button>
<button
className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
title={t('assistants:deleteAssistant')}
onClick={() => handleDelete(assistant.id)}
>
<IconTrash size={16} className="text-main-view-fg/50" />
</button>
</div>
</div>
))}
<div
className="bg-main-view p-3 min-h-[88px] rounded-md border border-dashed border-main-view-fg/10 flex items-center justify-center cursor-pointer hover:bg-main-view-fg/1 transition-all duration-200 ease-in-out"
key="new-assistant"
onClick={() => {
setEditingKey(null)
setOpen(true)
}}
>
<IconCirclePlus className="text-main-view-fg/50" />
</div>
</div>
<AddEditAssistant
open={open}

View File

@ -21,10 +21,7 @@ import { useEffect, useMemo, useCallback, useState } from 'react'
import { useModelProvider } from '@/hooks/useModelProvider'
import { useDownloadStore } from '@/hooks/useDownloadStore'
import { useServiceHub } from '@/hooks/useServiceHub'
import type {
CatalogModel,
ModelQuant,
} from '@/services/models/types'
import type { CatalogModel, ModelQuant } from '@/services/models/types'
import { Progress } from '@/components/ui/progress'
import { Button } from '@/components/ui/button'
import { cn } from '@/lib/utils'
@ -80,12 +77,13 @@ function HubModelDetailContent() {
}, [fetchSources])
const fetchRepo = useCallback(async () => {
const repoInfo = await serviceHub.models().fetchHuggingFaceRepo(
search.repo || modelId,
huggingfaceToken
)
const repoInfo = await serviceHub
.models()
.fetchHuggingFaceRepo(search.repo || modelId, huggingfaceToken)
if (repoInfo) {
const repoDetail = serviceHub.models().convertHfRepoToCatalogModel(repoInfo)
const repoDetail = serviceHub
.models()
.convertHfRepoToCatalogModel(repoInfo)
setRepoData(repoDetail || undefined)
}
}, [serviceHub, modelId, search, huggingfaceToken])
@ -168,7 +166,9 @@ function HubModelDetailContent() {
try {
// Use the HuggingFace path for the model
const modelPath = variant.path
const supported = await serviceHub.models().isModelSupported(modelPath, 8192)
const supported = await serviceHub
.models()
.isModelSupported(modelPath, 8192)
setModelSupportStatus((prev) => ({
...prev,
[modelKey]: supported,
@ -473,12 +473,20 @@ function HubModelDetailContent() {
addLocalDownloadingModel(
variant.model_id
)
serviceHub.models().pullModelWithMetadata(
variant.model_id,
variant.path,
modelData.mmproj_models?.[0]?.path,
huggingfaceToken
)
serviceHub
.models()
.pullModelWithMetadata(
variant.model_id,
variant.path,
(
modelData.mmproj_models?.find(
(e) =>
e.model_id.toLowerCase() ===
'mmproj-f16'
) || modelData.mmproj_models?.[0]
)?.path,
huggingfaceToken
)
}}
className={cn(isDownloading && 'hidden')}
>

View File

@ -1,6 +1,6 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { useVirtualizer } from '@tanstack/react-virtual'
import { createFileRoute, useNavigate, useSearch } from '@tanstack/react-router'
import { createFileRoute, useNavigate } from '@tanstack/react-router'
import { route } from '@/constants/routes'
import { useModelSources } from '@/hooks/useModelSources'
import { cn } from '@/lib/utils'
@ -34,8 +34,6 @@ import {
TooltipTrigger,
} from '@/components/ui/tooltip'
import { ModelInfoHoverCard } from '@/containers/ModelInfoHoverCard'
import Joyride, { CallBackProps, STATUS } from 'react-joyride'
import { CustomTooltipJoyRide } from '@/containers/CustomeTooltipJoyRide'
import {
DropdownMenu,
DropdownMenuContent,
@ -51,10 +49,9 @@ import { Loader } from 'lucide-react'
import { useTranslation } from '@/i18n/react-i18next-compat'
import Fuse from 'fuse.js'
import { useGeneralSetting } from '@/hooks/useGeneralSetting'
import { DownloadButtonPlaceholder } from '@/containers/DownloadButton'
import { useShallow } from 'zustand/shallow'
type ModelProps = {
model: CatalogModel
}
type SearchParams = {
repo: string
}
@ -77,7 +74,7 @@ function Hub() {
function HubContent() {
const parentRef = useRef(null)
const { huggingfaceToken } = useGeneralSetting()
const huggingfaceToken = useGeneralSetting((state) => state.huggingfaceToken)
const serviceHub = useServiceHub()
const { t } = useTranslation()
@ -93,7 +90,13 @@ function HubContent() {
}
}, [])
const { sources, fetchSources, loading } = useModelSources()
const { sources, fetchSources, loading } = useModelSources(
useShallow((state) => ({
sources: state.sources,
fetchSources: state.fetchSources,
loading: state.loading,
}))
)
const [searchValue, setSearchValue] = useState('')
const [sortSelected, setSortSelected] = useState('newest')
@ -108,16 +111,9 @@ function HubContent() {
const [modelSupportStatus, setModelSupportStatus] = useState<
Record<string, 'RED' | 'YELLOW' | 'GREEN' | 'LOADING'>
>({})
const [joyrideReady, setJoyrideReady] = useState(false)
const [currentStepIndex, setCurrentStepIndex] = useState(0)
const addModelSourceTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(
null
)
const downloadButtonRef = useRef<HTMLButtonElement>(null)
const hasTriggeredDownload = useRef(false)
const { getProviderByName } = useModelProvider()
const llamaProvider = getProviderByName('llamacpp')
const toggleModelExpansion = (modelId: string) => {
setExpandedModels((prev) => ({
@ -168,9 +164,10 @@ function HubContent() {
?.map((model) => ({
...model,
quants: model.quants.filter((variant) =>
llamaProvider?.models.some(
(m: { id: string }) => m.id === variant.model_id
)
useModelProvider
.getState()
.getProviderByName('llamacpp')
?.models.some((m: { id: string }) => m.id === variant.model_id)
),
}))
.filter((model) => model.quants.length > 0)
@ -186,7 +183,6 @@ function HubContent() {
showOnlyDownloaded,
huggingFaceRepo,
searchOptions,
llamaProvider?.models,
])
// The virtualizer
@ -215,9 +211,13 @@ function HubContent() {
addModelSourceTimeoutRef.current = setTimeout(async () => {
try {
const repoInfo = await serviceHub.models().fetchHuggingFaceRepo(searchValue, huggingfaceToken)
const repoInfo = await serviceHub
.models()
.fetchHuggingFaceRepo(searchValue, huggingfaceToken)
if (repoInfo) {
const catalogModel = serviceHub.models().convertHfRepoToCatalogModel(repoInfo)
const catalogModel = serviceHub
.models()
.convertHfRepoToCatalogModel(repoInfo)
if (
!sources.some(
(s) =>
@ -303,7 +303,9 @@ function HubContent() {
try {
// Use the HuggingFace path for the model
const modelPath = variant.path
const supportStatus = await serviceHub.models().isModelSupported(modelPath, 8192)
const supportStatus = await serviceHub
.models()
.isModelSupported(modelPath, 8192)
setModelSupportStatus((prev) => ({
...prev,
@ -320,178 +322,7 @@ function HubContent() {
[modelSupportStatus, serviceHub]
)
const DownloadButtonPlaceholder = useMemo(() => {
return ({ model }: ModelProps) => {
// Check if this is a HuggingFace repository (no quants)
if (model.quants.length === 0) {
return (
<div className="flex items-center gap-2">
<Button
size="sm"
onClick={() => {
window.open(
`https://huggingface.co/${model.model_name}`,
'_blank'
)
}}
>
View on HuggingFace
</Button>
</div>
)
}
const quant =
model.quants.find((e) =>
defaultModelQuantizations.some((m) =>
e.model_id.toLowerCase().includes(m)
)
) ?? model.quants[0]
const modelId = quant?.model_id || model.model_name
const modelUrl = quant?.path || modelId
const isDownloading =
localDownloadingModels.has(modelId) ||
downloadProcesses.some((e) => e.id === modelId)
const downloadProgress =
downloadProcesses.find((e) => e.id === modelId)?.progress || 0
const isDownloaded = llamaProvider?.models.some(
(m: { id: string }) => m.id === modelId
)
const isRecommended = isRecommendedModel(model.model_name)
const handleDownload = () => {
// Immediately set local downloading state
addLocalDownloadingModel(modelId)
const mmprojPath = model.mmproj_models?.[0]?.path
serviceHub.models().pullModelWithMetadata(
modelId,
modelUrl,
mmprojPath,
huggingfaceToken
)
}
return (
<div
className={cn(
'flex items-center',
isRecommended && 'hub-download-button-step'
)}
>
{isDownloading && !isDownloaded && (
<div className={cn('flex items-center gap-2 w-20')}>
<Progress value={downloadProgress * 100} />
<span className="text-xs text-center text-main-view-fg/70">
{Math.round(downloadProgress * 100)}%
</span>
</div>
)}
{isDownloaded ? (
<Button
size="sm"
onClick={() => handleUseModel(modelId)}
data-test-id={`hub-model-${modelId}`}
>
{t('hub:use')}
</Button>
) : (
<Button
data-test-id={`hub-model-${modelId}`}
size="sm"
onClick={handleDownload}
className={cn(isDownloading && 'hidden')}
ref={isRecommended ? downloadButtonRef : undefined}
>
{t('hub:download')}
</Button>
)}
</div>
)
}
}, [
localDownloadingModels,
downloadProcesses,
llamaProvider?.models,
isRecommendedModel,
t,
addLocalDownloadingModel,
huggingfaceToken,
handleUseModel,
serviceHub,
])
const { step } = useSearch({ from: Route.id })
const isSetup = step === 'setup_local_provider'
// Wait for DOM to be ready before starting Joyride
useEffect(() => {
if (!loading && filteredModels.length > 0 && isSetup) {
const timer = setTimeout(() => {
setJoyrideReady(true)
}, 100)
return () => clearTimeout(timer)
} else {
setJoyrideReady(false)
}
}, [loading, filteredModels.length, isSetup])
const handleJoyrideCallback = (data: CallBackProps) => {
const { status, index } = data
if (
status === STATUS.FINISHED &&
!isDownloading &&
isLastStep &&
!hasTriggeredDownload.current
) {
const recommendedModel = filteredModels.find((model) =>
isRecommendedModel(model.model_name)
)
if (recommendedModel && recommendedModel.quants[0]?.model_id) {
if (downloadButtonRef.current) {
hasTriggeredDownload.current = true
downloadButtonRef.current.click()
}
return
}
}
if (status === STATUS.FINISHED) {
navigate({
to: route.hub.index,
})
}
// Track current step index
setCurrentStepIndex(index)
}
// Check if any model is currently downloading
const isDownloading =
localDownloadingModels.size > 0 || downloadProcesses.length > 0
const steps = [
{
target: '.hub-model-card-step',
title: t('hub:joyride.recommendedModelTitle'),
disableBeacon: true,
content: t('hub:joyride.recommendedModelContent'),
},
{
target: '.hub-download-button-step',
title: isDownloading
? t('hub:joyride.downloadInProgressTitle')
: t('hub:joyride.downloadModelTitle'),
disableBeacon: true,
content: isDownloading
? t('hub:joyride.downloadInProgressContent')
: t('hub:joyride.downloadModelContent'),
},
]
// Check if we're on the last step
const isLastStep = currentStepIndex === steps.length - 1
const renderFilter = () => {
return (
<>
@ -544,31 +375,6 @@ function HubContent() {
return (
<>
<Joyride
run={joyrideReady}
floaterProps={{
hideArrow: true,
}}
steps={steps}
tooltipComponent={CustomTooltipJoyRide}
spotlightPadding={0}
continuous={true}
showSkipButton={!isLastStep}
hideCloseButton={true}
spotlightClicks={true}
disableOverlay={IS_LINUX}
disableOverlayClose={true}
callback={handleJoyrideCallback}
locale={{
back: t('hub:joyride.back'),
close: t('hub:joyride.close'),
last: !isDownloading
? t('hub:joyride.lastWithDownload')
: t('hub:joyride.last'),
next: t('hub:joyride.next'),
skip: t('hub:joyride.skip'),
}}
/>
<div className="flex h-full w-full">
<div className="flex flex-col h-full w-full ">
<HeaderPage>
@ -698,6 +504,7 @@ function HubContent() {
/>
<DownloadButtonPlaceholder
model={filteredModels[virtualItem.index]}
handleUseModel={handleUseModel}
/>
</div>
</div>
@ -908,10 +715,13 @@ function HubContent() {
(e) => e.id === variant.model_id
)?.progress || 0
const isDownloaded =
llamaProvider?.models.some(
(m: { id: string }) =>
m.id === variant.model_id
)
useModelProvider
.getState()
.getProviderByName('llamacpp')
?.models.some(
(m: { id: string }) =>
m.id === variant.model_id
)
if (isDownloading) {
return (
@ -962,14 +772,26 @@ function HubContent() {
addLocalDownloadingModel(
variant.model_id
)
serviceHub.models().pullModelWithMetadata(
variant.model_id,
variant.path,
filteredModels[
virtualItem.index
].mmproj_models?.[0]?.path,
huggingfaceToken
)
serviceHub
.models()
.pullModelWithMetadata(
variant.model_id,
variant.path,
(
filteredModels[
virtualItem.index
].mmproj_models?.find(
(e) =>
e.model_id.toLowerCase() ===
'mmproj-f16'
) ||
filteredModels[
virtualItem.index
].mmproj_models?.[0]
)?.path,
huggingfaceToken
)
}}
>
<IconDownload

View File

@ -0,0 +1,143 @@
import { createFileRoute, useParams } from '@tanstack/react-router'
import { useMemo } from 'react'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useThreads } from '@/hooks/useThreads'
import { useModelProvider } from '@/hooks/useModelProvider'
import { useTranslation } from '@/i18n/react-i18next-compat'
import ChatInput from '@/containers/ChatInput'
import HeaderPage from '@/containers/HeaderPage'
import SetupScreen from '@/containers/SetupScreen'
import ThreadList from '@/containers/ThreadList'
import DropdownAssistant from '@/containers/DropdownAssistant'
import { PlatformFeatures } from '@/lib/platform/const'
import { PlatformFeature } from '@/lib/platform/types'
import { IconMessage } from '@tabler/icons-react'
import { cn } from '@/lib/utils'
import { useAppearance } from '@/hooks/useAppearance'
import { useSmallScreen } from '@/hooks/useMediaQuery'
export const Route = createFileRoute('/project/$projectId')({
component: ProjectPage,
})
function ProjectPage() {
const { t } = useTranslation()
const { projectId } = useParams({ from: '/project/$projectId' })
const { providers } = useModelProvider()
const { getFolderById } = useThreadManagement()
const threads = useThreads((state) => state.threads)
const chatWidth = useAppearance((state) => state.chatWidth)
const isSmallScreen = useSmallScreen()
// Find the project
const project = getFolderById(projectId)
// Get threads for this project
const projectThreads = useMemo(() => {
return Object.values(threads)
.filter((thread) => thread.metadata?.project?.id === projectId)
.sort((a, b) => (b.updated || 0) - (a.updated || 0))
}, [threads, projectId])
// Conditional to check if there are any valid providers
const hasValidProviders = providers.some(
(provider) =>
provider.api_key?.length ||
(provider.provider === 'llamacpp' && provider.models.length) ||
(provider.provider === 'jan' && provider.models.length)
)
if (!hasValidProviders) {
return <SetupScreen />
}
if (!project) {
return (
<div className="flex h-full flex-col items-center justify-center">
<div className="text-center">
<h1 className="text-2xl font-semibold text-main-view-fg mb-2">
{t('projects.projectNotFound')}
</h1>
<p className="text-main-view-fg/70">
{t('projects.projectNotFoundDesc')}
</p>
</div>
</div>
)
}
return (
<div className="flex h-full flex-col">
<HeaderPage>
<div className="flex items-center justify-between w-full">
{PlatformFeatures[PlatformFeature.ASSISTANTS] && (
<DropdownAssistant />
)}
</div>
</HeaderPage>
<div className="h-full relative flex flex-col justify-between px-4 md:px-8 py-4 overflow-y-auto">
<div
className={cn(
'mx-auto flex h-full flex-col justify-between',
chatWidth === 'compact' ? 'w-full md:w-4/6' : 'w-full',
isSmallScreen && 'w-full'
)}
>
<div className="flex h-full flex-col">
<div className="mb-6 mt-2">
{projectThreads.length > 0 && (
<>
<h2 className="text-xl font-semibold text-main-view-fg mb-2">
{t('projects.conversationsIn', { projectName: project.name })}
</h2>
<p className="text-main-view-fg/70">
{t('projects.conversationsDescription')}
</p>
</>
)}
</div>
{/* Thread List or Empty State */}
<div className="mb-0">
{projectThreads.length > 0 ? (
<ThreadList threads={projectThreads} variant="project" />
) : (
<div className="flex flex-col items-center justify-center py-12 text-center">
<IconMessage
size={48}
className="text-main-view-fg/30 mb-4"
/>
<h3 className="text-lg font-medium text-main-view-fg/60 mb-2">
{t('projects.noConversationsIn', { projectName: project.name })}
</h3>
<p className="text-main-view-fg/50 text-sm">
{t('projects.startNewConversation', { projectName: project.name })}
</p>
</div>
)}
</div>
</div>
</div>
</div>
{/* New Chat Input */}
<div
className={cn(
'mx-auto pt-2 pb-3 shrink-0 relative px-2',
chatWidth === 'compact' ? 'w-full md:w-4/6' : 'w-full',
isSmallScreen && 'w-full'
)}
>
<ChatInput
showSpeedToken={false}
initialMessage={true}
projectId={projectId}
/>
</div>
</div>
)
}

View File

@ -0,0 +1,244 @@
import { createFileRoute } from '@tanstack/react-router'
import { useState, useMemo } from 'react'
import { useThreadManagement } from '@/hooks/useThreadManagement'
import { useThreads } from '@/hooks/useThreads'
import { useTranslation } from '@/i18n/react-i18next-compat'
import HeaderPage from '@/containers/HeaderPage'
import ThreadList from '@/containers/ThreadList'
import {
IconCirclePlus,
IconPencil,
IconTrash,
IconFolder,
IconChevronDown,
IconChevronRight,
} from '@tabler/icons-react'
import AddProjectDialog from '@/containers/dialogs/AddProjectDialog'
import { DeleteProjectDialog } from '@/containers/dialogs/DeleteProjectDialog'
import { Button } from '@/components/ui/button'
import { formatDate } from '@/utils/formatDate'
export const Route = createFileRoute('/project/')({
component: Project,
})
function Project() {
return <ProjectContent />
}
function ProjectContent() {
const { t } = useTranslation()
const { folders, addFolder, updateFolder, deleteFolder, getFolderById } =
useThreadManagement()
const threads = useThreads((state) => state.threads)
const [open, setOpen] = useState(false)
const [editingKey, setEditingKey] = useState<string | null>(null)
const [deleteConfirmOpen, setDeleteConfirmOpen] = useState(false)
const [deletingId, setDeletingId] = useState<string | null>(null)
const [expandedProjects, setExpandedProjects] = useState<Set<string>>(
new Set()
)
const handleDelete = (id: string) => {
setDeletingId(id)
setDeleteConfirmOpen(true)
}
const confirmDelete = () => {
if (deletingId) {
deleteFolder(deletingId)
setDeleteConfirmOpen(false)
setDeletingId(null)
}
}
const handleSave = (name: string) => {
if (editingKey) {
updateFolder(editingKey, name)
} else {
addFolder(name)
}
setOpen(false)
setEditingKey(null)
}
const formatProjectDate = (timestamp: number) => {
return formatDate(new Date(timestamp), { includeTime: false })
}
// Get threads for a specific project
const getThreadsForProject = useMemo(() => {
return (projectId: string) => {
return Object.values(threads)
.filter((thread) => thread.metadata?.project?.id === projectId)
.sort((a, b) => (b.updated || 0) - (a.updated || 0))
}
}, [threads])
const toggleProjectExpansion = (projectId: string) => {
setExpandedProjects((prev) => {
const newSet = new Set(prev)
if (newSet.has(projectId)) {
newSet.delete(projectId)
} else {
newSet.add(projectId)
}
return newSet
})
}
return (
<div className="flex h-full flex-col justify-center">
<HeaderPage>
<div className="flex items-center justify-between w-full mr-2">
<span>{t('projects.title')}</span>
<Button
onClick={() => {
setEditingKey(null)
setOpen(true)
}}
size="sm"
className="relative z-50"
>
<IconCirclePlus size={16} />
{t('projects.addProject')}
</Button>
</div>
</HeaderPage>
<div className="h-full overflow-y-auto flex flex-col">
<div className="p-4 w-full md:w-3/4 mx-auto mt-2">
{folders.length === 0 ? (
<div className="flex flex-col items-center justify-center py-12 text-center">
<IconFolder size={48} className="text-main-view-fg/30 mb-4" />
<h3 className="text-lg font-medium text-main-view-fg/60 mb-2">
{t('projects.noProjectsYet')}
</h3>
<p className="text-main-view-fg/50 text-sm">
{t('projects.noProjectsYetDesc')}
</p>
</div>
) : (
<div className="space-y-3">
{folders
.slice()
.sort((a, b) => b.updated_at - a.updated_at)
.map((folder) => {
const projectThreads = getThreadsForProject(folder.id)
const isExpanded = expandedProjects.has(folder.id)
return (
<div
className="bg-main-view-fg/3 py-2 px-4 rounded-lg"
key={folder.id}
>
<div className="flex items-center gap-4">
<div className="flex items-start gap-3 flex-1">
<div className="shrink-0 w-8 h-8 relative flex items-center justify-center bg-main-view-fg/4 rounded-md">
<IconFolder
size={16}
className="text-main-view-fg/50"
/>
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center gap-2">
<h3 className="text-base font-medium text-main-view-fg/80 line-clamp-1">
{folder.name}
</h3>
<span className="text-xs bg-main-view-fg/10 text-main-view-fg/60 px-2 py-0.5 rounded-full">
{projectThreads.length}{' '}
{projectThreads.length === 1
? t('projects.thread')
: t('projects.threads')}
</span>
</div>
<p className="text-main-view-fg/50 text-xs line-clamp-2 mt-0.5">
{t('projects.updated')}{' '}
{formatProjectDate(folder.updated_at)}
</p>
</div>
</div>
<div className="flex items-center">
{projectThreads.length > 0 && (
<button
className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out mr-1"
title={
isExpanded
? t('projects.collapseThreads')
: t('projects.expandThreads')
}
onClick={() => toggleProjectExpansion(folder.id)}
>
{isExpanded ? (
<IconChevronDown
size={16}
className="text-main-view-fg/50"
/>
) : (
<IconChevronRight
size={16}
className="text-main-view-fg/50"
/>
)}
</button>
)}
<button
className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
title={t('projects.editProject')}
onClick={() => {
setEditingKey(folder.id)
setOpen(true)
}}
>
<IconPencil
size={16}
className="text-main-view-fg/50"
/>
</button>
<button
className="size-8 cursor-pointer flex items-center justify-center rounded-md hover:bg-main-view-fg/10 transition-all duration-200 ease-in-out"
title={t('projects.deleteProject')}
onClick={() => handleDelete(folder.id)}
>
<IconTrash
size={16}
className="text-main-view-fg/50"
/>
</button>
</div>
</div>
{/* Thread List */}
{isExpanded && projectThreads.length > 0 && (
<div className="mt-3 pl-2">
<ThreadList
threads={projectThreads}
variant="project"
/>
</div>
)}
</div>
)
})}
</div>
)}
</div>
</div>
<AddProjectDialog
open={open}
onOpenChange={setOpen}
editingKey={editingKey}
initialData={editingKey ? getFolderById(editingKey) : undefined}
onSave={handleSave}
/>
<DeleteProjectDialog
open={deleteConfirmOpen}
onOpenChange={setDeleteConfirmOpen}
onConfirm={confirmDelete}
projectName={deletingId ? getFolderById(deletingId)?.name : undefined}
/>
</div>
)
}

View File

@ -22,7 +22,7 @@ Object.defineProperty(global, 'MODEL_CATALOG_URL', {
describe('DefaultModelsService', () => {
let modelsService: DefaultModelsService
const mockEngine = {
list: vi.fn(),
updateSettings: vi.fn(),
@ -246,7 +246,9 @@ describe('DefaultModelsService', () => {
})
mockEngine.load.mockRejectedValue(error)
await expect(modelsService.startModel(provider, model)).rejects.toThrow(error)
await expect(modelsService.startModel(provider, model)).rejects.toThrow(
error
)
})
it('should not load model again', async () => {
const mockSettings = {
@ -263,7 +265,9 @@ describe('DefaultModelsService', () => {
includes: () => true,
})
expect(mockEngine.load).toBeCalledTimes(0)
await expect(modelsService.startModel(provider, model)).resolves.toBe(undefined)
await expect(modelsService.startModel(provider, model)).resolves.toBe(
undefined
)
})
})
@ -312,7 +316,9 @@ describe('DefaultModelsService', () => {
json: vi.fn().mockResolvedValue(mockRepoData),
})
const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
const result = await modelsService.fetchHuggingFaceRepo(
'microsoft/DialoGPT-medium'
)
expect(result).toEqual(mockRepoData)
expect(fetch).toHaveBeenCalledWith(
@ -342,7 +348,9 @@ describe('DefaultModelsService', () => {
)
// Test with domain prefix
await modelsService.fetchHuggingFaceRepo('huggingface.co/microsoft/DialoGPT-medium')
await modelsService.fetchHuggingFaceRepo(
'huggingface.co/microsoft/DialoGPT-medium'
)
expect(fetch).toHaveBeenCalledWith(
'https://huggingface.co/api/models/microsoft/DialoGPT-medium?blobs=true&files_metadata=true',
{
@ -365,7 +373,9 @@ describe('DefaultModelsService', () => {
expect(await modelsService.fetchHuggingFaceRepo('')).toBeNull()
// Test string without slash
expect(await modelsService.fetchHuggingFaceRepo('invalid-repo')).toBeNull()
expect(
await modelsService.fetchHuggingFaceRepo('invalid-repo')
).toBeNull()
// Test whitespace only
expect(await modelsService.fetchHuggingFaceRepo(' ')).toBeNull()
@ -378,7 +388,8 @@ describe('DefaultModelsService', () => {
statusText: 'Not Found',
})
const result = await modelsService.fetchHuggingFaceRepo('nonexistent/model')
const result =
await modelsService.fetchHuggingFaceRepo('nonexistent/model')
expect(result).toBeNull()
expect(fetch).toHaveBeenCalledWith(
@ -398,7 +409,9 @@ describe('DefaultModelsService', () => {
statusText: 'Internal Server Error',
})
const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
const result = await modelsService.fetchHuggingFaceRepo(
'microsoft/DialoGPT-medium'
)
expect(result).toBeNull()
expect(consoleSpy).toHaveBeenCalledWith(
@ -414,7 +427,9 @@ describe('DefaultModelsService', () => {
;(fetch as any).mockRejectedValue(new Error('Network error'))
const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
const result = await modelsService.fetchHuggingFaceRepo(
'microsoft/DialoGPT-medium'
)
expect(result).toBeNull()
expect(consoleSpy).toHaveBeenCalledWith(
@ -448,7 +463,9 @@ describe('DefaultModelsService', () => {
json: vi.fn().mockResolvedValue(mockRepoData),
})
const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
const result = await modelsService.fetchHuggingFaceRepo(
'microsoft/DialoGPT-medium'
)
expect(result).toEqual(mockRepoData)
})
@ -487,7 +504,9 @@ describe('DefaultModelsService', () => {
json: vi.fn().mockResolvedValue(mockRepoData),
})
const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
const result = await modelsService.fetchHuggingFaceRepo(
'microsoft/DialoGPT-medium'
)
expect(result).toEqual(mockRepoData)
})
@ -531,7 +550,9 @@ describe('DefaultModelsService', () => {
json: vi.fn().mockResolvedValue(mockRepoData),
})
const result = await modelsService.fetchHuggingFaceRepo('microsoft/DialoGPT-medium')
const result = await modelsService.fetchHuggingFaceRepo(
'microsoft/DialoGPT-medium'
)
expect(result).toEqual(mockRepoData)
// Verify the GGUF file is present in siblings
@ -576,7 +597,8 @@ describe('DefaultModelsService', () => {
}
it('should convert HuggingFace repo to catalog model format', () => {
const result = modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
const result =
modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
const expected: CatalogModel = {
model_name: 'microsoft/DialoGPT-medium',
@ -586,12 +608,12 @@ describe('DefaultModelsService', () => {
num_quants: 2,
quants: [
{
model_id: 'model-q4_0',
model_id: 'microsoft/model-q4_0',
path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf',
file_size: '2.0 GB',
},
{
model_id: 'model-q8_0',
model_id: 'microsoft/model-q8_0',
path: 'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q8_0.GGUF',
file_size: '4.0 GB',
},
@ -635,7 +657,8 @@ describe('DefaultModelsService', () => {
siblings: undefined,
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithoutSiblings)
const result =
modelsService.convertHfRepoToCatalogModel(repoWithoutSiblings)
expect(result.num_quants).toBe(0)
expect(result.quants).toEqual([])
@ -663,7 +686,9 @@ describe('DefaultModelsService', () => {
],
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithVariousFileSizes)
const result = modelsService.convertHfRepoToCatalogModel(
repoWithVariousFileSizes
)
expect(result.quants[0].file_size).toBe('500.0 MB')
expect(result.quants[1].file_size).toBe('3.5 GB')
@ -676,7 +701,8 @@ describe('DefaultModelsService', () => {
tags: [],
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithEmptyTags)
const result =
modelsService.convertHfRepoToCatalogModel(repoWithEmptyTags)
expect(result.description).toBe('**Tags**: ')
})
@ -687,7 +713,8 @@ describe('DefaultModelsService', () => {
downloads: undefined as any,
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithoutDownloads)
const result =
modelsService.convertHfRepoToCatalogModel(repoWithoutDownloads)
expect(result.downloads).toBe(0)
})
@ -714,15 +741,17 @@ describe('DefaultModelsService', () => {
],
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithVariousGGUF)
const result =
modelsService.convertHfRepoToCatalogModel(repoWithVariousGGUF)
expect(result.quants[0].model_id).toBe('model')
expect(result.quants[1].model_id).toBe('MODEL')
expect(result.quants[2].model_id).toBe('complex-model-name')
expect(result.quants[0].model_id).toBe('microsoft/model')
expect(result.quants[1].model_id).toBe('microsoft/MODEL')
expect(result.quants[2].model_id).toBe('microsoft/complex-model-name')
})
it('should generate correct download paths', () => {
const result = modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
const result =
modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
expect(result.quants[0].path).toBe(
'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/model-q4_0.gguf'
@ -733,7 +762,8 @@ describe('DefaultModelsService', () => {
})
it('should generate correct readme URL', () => {
const result = modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
const result =
modelsService.convertHfRepoToCatalogModel(mockHuggingFaceRepo)
expect(result.readme).toBe(
'https://huggingface.co/microsoft/DialoGPT-medium/resolve/main/README.md'
@ -767,13 +797,14 @@ describe('DefaultModelsService', () => {
],
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithMixedCase)
const result =
modelsService.convertHfRepoToCatalogModel(repoWithMixedCase)
expect(result.num_quants).toBe(3)
expect(result.quants).toHaveLength(3)
expect(result.quants[0].model_id).toBe('model-1')
expect(result.quants[1].model_id).toBe('model-2')
expect(result.quants[2].model_id).toBe('model-3')
expect(result.quants[0].model_id).toBe('microsoft/model-1')
expect(result.quants[1].model_id).toBe('microsoft/model-2')
expect(result.quants[2].model_id).toBe('microsoft/model-3')
})
it('should handle edge cases with file size formatting', () => {
@ -798,7 +829,8 @@ describe('DefaultModelsService', () => {
],
}
const result = modelsService.convertHfRepoToCatalogModel(repoWithEdgeCases)
const result =
modelsService.convertHfRepoToCatalogModel(repoWithEdgeCases)
expect(result.quants[0].file_size).toBe('0.0 MB')
expect(result.quants[1].file_size).toBe('1.0 GB')
@ -850,7 +882,10 @@ describe('DefaultModelsService', () => {
mockEngineManager.get.mockReturnValue(mockEngineWithSupport)
const result = await modelsService.isModelSupported('/path/to/model.gguf', 4096)
const result = await modelsService.isModelSupported(
'/path/to/model.gguf',
4096
)
expect(result).toBe('GREEN')
expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
@ -867,7 +902,10 @@ describe('DefaultModelsService', () => {
mockEngineManager.get.mockReturnValue(mockEngineWithSupport)
const result = await modelsService.isModelSupported('/path/to/model.gguf', 8192)
const result = await modelsService.isModelSupported(
'/path/to/model.gguf',
8192
)
expect(result).toBe('YELLOW')
expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(
@ -884,7 +922,9 @@ describe('DefaultModelsService', () => {
mockEngineManager.get.mockReturnValue(mockEngineWithSupport)
const result = await modelsService.isModelSupported('/path/to/large-model.gguf')
const result = await modelsService.isModelSupported(
'/path/to/large-model.gguf'
)
expect(result).toBe('RED')
expect(mockEngineWithSupport.isModelSupported).toHaveBeenCalledWith(

View File

@ -30,6 +30,10 @@ export class DefaultModelsService implements ModelsService {
return EngineManager.instance().get(provider) as AIEngine | undefined
}
async getModel(modelId: string): Promise<modelInfo | undefined> {
return this.getEngine()?.get(modelId)
}
async fetchModels(): Promise<modelInfo[]> {
return this.getEngine()?.list() ?? []
}
@ -127,7 +131,7 @@ export class DefaultModelsService implements ModelsService {
const modelId = file.rfilename.replace(/\.gguf$/i, '')
return {
model_id: sanitizeModelId(modelId),
model_id: `${repo.author}/${sanitizeModelId(modelId)}`,
path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
file_size: formatFileSize(file.size),
}
@ -529,19 +533,21 @@ export class DefaultModelsService implements ModelsService {
// Fallback if method is not available
console.warn('planModelLoad method not available in llamacpp engine')
return {
gpuLayers: 0,
gpuLayers: 100,
maxContextLength: 2048,
noOffloadKVCache: true,
noOffloadKVCache: false,
offloadMmproj: false,
batchSize: 2048,
mode: 'Unsupported',
}
} catch (error) {
console.error(`Error planning model load for path ${modelPath}:`, error)
return {
gpuLayers: 0,
gpuLayers: 100,
maxContextLength: 2048,
noOffloadKVCache: true,
noOffloadKVCache: false,
offloadMmproj: false,
batchSize: 2048,
mode: 'Unsupported',
}
}

View File

@ -86,10 +86,12 @@ export interface ModelPlan {
maxContextLength: number
noOffloadKVCache: boolean
offloadMmproj: boolean
batchSize: number
mode: 'GPU' | 'Hybrid' | 'CPU' | 'Unsupported'
}
export interface ModelsService {
getModel(modelId: string): Promise<modelInfo | undefined>
fetchModels(): Promise<modelInfo[]>
fetchModelCatalog(): Promise<ModelCatalog>
fetchHuggingFaceRepo(

View File

@ -30,6 +30,12 @@ export class DefaultThreadsService implements ThreadsService {
provider: e.assistants?.[0]?.model?.engine,
},
assistants: e.assistants ?? [defaultAssistant],
metadata: {
...e.metadata,
// Override extracted fields to avoid duplication
order: e.metadata?.order,
is_favorite: e.metadata?.is_favorite,
},
} as Thread
})
})
@ -101,6 +107,7 @@ export class DefaultThreadsService implements ThreadsService {
},
],
metadata: {
...thread.metadata,
is_favorite: thread.isFavorite,
order: thread.order,
},
@ -115,4 +122,4 @@ export class DefaultThreadsService implements ThreadsService {
.get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
?.deleteThread(threadId)
}
}
}

View File

@ -44,6 +44,14 @@ type Thread = {
model?: ThreadModel
updated: number
order?: number
metadata?: {
project?: {
id: string
name: string
updated_at: number
}
[key: string]: unknown
}
}
type Assistant = {
@ -62,4 +70,4 @@ type TokenSpeed = {
tokenSpeed: number
tokenCount: number
lastTimestamp: number
}
}