diff --git a/docs/public/sitemap-0.xml b/docs/public/sitemap-0.xml
index b74cbb237..2b4a04975 100644
--- a/docs/public/sitemap-0.xml
+++ b/docs/public/sitemap-0.xml
@@ -93,7 +93,7 @@
https://jan.ai/docs/desktop/windows2024-09-09T08:19:45.722Zdaily1
https://jan.ai/docs/error-codes2024-09-09T08:19:45.722Zdaily1
https://jan.ai/docs/extensions2024-09-09T08:19:45.722Zdaily1
-https://jan.ai/docs/installing-extension2024-09-09T08:19:45.722Zdaily1
+https://jan.ai/docs/install-extensions2024-09-09T08:19:45.722Zdaily1
https://jan.ai/docs/models2024-09-09T08:19:45.722Zdaily1
https://jan.ai/docs/models/manage-models2024-09-09T08:19:45.722Zdaily1
https://jan.ai/docs/models/model-parameters2024-09-09T08:19:45.722Zdaily1
diff --git a/docs/src/pages/docs/_assets/llama.cpp-01.png b/docs/src/pages/docs/_assets/llama.cpp-01.png
new file mode 100644
index 000000000..4c76d8b25
Binary files /dev/null and b/docs/src/pages/docs/_assets/llama.cpp-01.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-04.png b/docs/src/pages/docs/_assets/model-management-04.png
index 6b3f82464..211ffcd7e 100644
Binary files a/docs/src/pages/docs/_assets/model-management-04.png and b/docs/src/pages/docs/_assets/model-management-04.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-05.png b/docs/src/pages/docs/_assets/model-management-05.png
index e7d4ea38d..b38b8fe36 100644
Binary files a/docs/src/pages/docs/_assets/model-management-05.png and b/docs/src/pages/docs/_assets/model-management-05.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-06.png b/docs/src/pages/docs/_assets/model-management-06.png
index 5d5449c8b..369feff9a 100644
Binary files a/docs/src/pages/docs/_assets/model-management-06.png and b/docs/src/pages/docs/_assets/model-management-06.png differ
diff --git a/docs/src/pages/docs/_assets/model-management-07.png b/docs/src/pages/docs/_assets/model-management-07.png
index 83a562ddb..452c09790 100644
Binary files a/docs/src/pages/docs/_assets/model-management-07.png and b/docs/src/pages/docs/_assets/model-management-07.png differ
diff --git a/docs/src/pages/docs/_assets/settings-01.png b/docs/src/pages/docs/_assets/settings-01.png
index 8a2b734d7..e4701c8a5 100644
Binary files a/docs/src/pages/docs/_assets/settings-01.png and b/docs/src/pages/docs/_assets/settings-01.png differ
diff --git a/docs/src/pages/docs/_assets/settings-02.png b/docs/src/pages/docs/_assets/settings-02.png
index f54d6bd44..f43c06738 100644
Binary files a/docs/src/pages/docs/_assets/settings-02.png and b/docs/src/pages/docs/_assets/settings-02.png differ
diff --git a/docs/src/pages/docs/_assets/settings-03.png b/docs/src/pages/docs/_assets/settings-03.png
index f149ebb01..a12f5dd56 100644
Binary files a/docs/src/pages/docs/_assets/settings-03.png and b/docs/src/pages/docs/_assets/settings-03.png differ
diff --git a/docs/src/pages/docs/_assets/settings-04.png b/docs/src/pages/docs/_assets/settings-04.png
index cb55f4cd0..5213a4413 100644
Binary files a/docs/src/pages/docs/_assets/settings-04.png and b/docs/src/pages/docs/_assets/settings-04.png differ
diff --git a/docs/src/pages/docs/_assets/settings-05.png b/docs/src/pages/docs/_assets/settings-05.png
index 37aea156a..979a55e1d 100644
Binary files a/docs/src/pages/docs/_assets/settings-05.png and b/docs/src/pages/docs/_assets/settings-05.png differ
diff --git a/docs/src/pages/docs/_assets/settings-06.png b/docs/src/pages/docs/_assets/settings-06.png
index a25688f1e..4d26803ef 100644
Binary files a/docs/src/pages/docs/_assets/settings-06.png and b/docs/src/pages/docs/_assets/settings-06.png differ
diff --git a/docs/src/pages/docs/_assets/settings-07.png b/docs/src/pages/docs/_assets/settings-07.png
index 620ca2078..818de7850 100644
Binary files a/docs/src/pages/docs/_assets/settings-07.png and b/docs/src/pages/docs/_assets/settings-07.png differ
diff --git a/docs/src/pages/docs/_assets/settings-08.png b/docs/src/pages/docs/_assets/settings-08.png
index 9597831e0..733e2211f 100644
Binary files a/docs/src/pages/docs/_assets/settings-08.png and b/docs/src/pages/docs/_assets/settings-08.png differ
diff --git a/docs/src/pages/docs/_assets/settings-09.png b/docs/src/pages/docs/_assets/settings-09.png
index 58ce63cba..d89265176 100644
Binary files a/docs/src/pages/docs/_assets/settings-09.png and b/docs/src/pages/docs/_assets/settings-09.png differ
diff --git a/docs/src/pages/docs/_assets/settings-10.png b/docs/src/pages/docs/_assets/settings-10.png
index c72aa7c0b..734c418a9 100644
Binary files a/docs/src/pages/docs/_assets/settings-10.png and b/docs/src/pages/docs/_assets/settings-10.png differ
diff --git a/docs/src/pages/docs/_assets/settings-11.png b/docs/src/pages/docs/_assets/settings-11.png
index 833386ef4..8e6c27fa6 100644
Binary files a/docs/src/pages/docs/_assets/settings-11.png and b/docs/src/pages/docs/_assets/settings-11.png differ
diff --git a/docs/src/pages/docs/_assets/settings-12.png b/docs/src/pages/docs/_assets/settings-12.png
index a41744fa1..ab832cf14 100644
Binary files a/docs/src/pages/docs/_assets/settings-12.png and b/docs/src/pages/docs/_assets/settings-12.png differ
diff --git a/docs/src/pages/docs/_assets/settings-13.png b/docs/src/pages/docs/_assets/settings-13.png
index a1111d8e2..c6e2790ee 100644
Binary files a/docs/src/pages/docs/_assets/settings-13.png and b/docs/src/pages/docs/_assets/settings-13.png differ
diff --git a/docs/src/pages/docs/_assets/settings-14.png b/docs/src/pages/docs/_assets/settings-14.png
index 049f0070a..5f8c40a61 100644
Binary files a/docs/src/pages/docs/_assets/settings-14.png and b/docs/src/pages/docs/_assets/settings-14.png differ
diff --git a/docs/src/pages/docs/_assets/settings-15.png b/docs/src/pages/docs/_assets/settings-15.png
index 6292523fa..9dc80644d 100644
Binary files a/docs/src/pages/docs/_assets/settings-15.png and b/docs/src/pages/docs/_assets/settings-15.png differ
diff --git a/docs/src/pages/docs/_assets/settings-17.png b/docs/src/pages/docs/_assets/settings-17.png
index c8e74183b..803e35965 100644
Binary files a/docs/src/pages/docs/_assets/settings-17.png and b/docs/src/pages/docs/_assets/settings-17.png differ
diff --git a/docs/src/pages/docs/_assets/settings-18.png b/docs/src/pages/docs/_assets/settings-18.png
index fbc081e85..7db0c5655 100644
Binary files a/docs/src/pages/docs/_assets/settings-18.png and b/docs/src/pages/docs/_assets/settings-18.png differ
diff --git a/docs/src/pages/docs/_assets/trouble-shooting-01.png b/docs/src/pages/docs/_assets/trouble-shooting-01.png
index fc965988b..5cba10492 100644
Binary files a/docs/src/pages/docs/_assets/trouble-shooting-01.png and b/docs/src/pages/docs/_assets/trouble-shooting-01.png differ
diff --git a/docs/src/pages/docs/_meta.json b/docs/src/pages/docs/_meta.json
index 96acf49d5..bdd9be159 100644
--- a/docs/src/pages/docs/_meta.json
+++ b/docs/src/pages/docs/_meta.json
@@ -27,16 +27,17 @@
"title": "ENGINES",
"type": "separator"
},
- "built-in": "Local Engines",
+ "local-engines": "Local Engines",
"remote-models": "Remote Engines",
+ "install-engines": "Install Engines",
"extensions-separator": {
"title": "EXTENSIONS",
"type": "separator"
},
"extensions": "Overview",
- "extensions-settings": "Extensions Settings",
+ "extensions-settings": "Extension Settings",
"configure-extensions": "Configure Extensions",
- "installing-extension": "Install Extensions",
+ "install-extensions": "Install Extensions",
"troubleshooting-separator": {
"title": "TROUBLESHOOTING",
"type": "separator"
diff --git a/docs/src/pages/docs/built-in/llama-cpp.mdx b/docs/src/pages/docs/built-in/llama-cpp.mdx
deleted file mode 100644
index 8d71ff2ae..000000000
--- a/docs/src/pages/docs/built-in/llama-cpp.mdx
+++ /dev/null
@@ -1,133 +0,0 @@
----
-title: llama.cpp
-description: A step-by-step guide on how to customize the llama.cpp engine.
-keywords:
- [
- Jan,
- Customizable Intelligence, LLM,
- local AI,
- privacy focus,
- free and open source,
- private and offline,
- conversational AI,
- no-subscription fee,
- large language models,
- Llama CPP integration,
- llama.cpp Engine,
- Intel CPU,
- AMD CPU,
- NVIDIA GPU,
- AMD GPU Radeon,
- Apple Silicon,
- Intel Arc GPU,
- ]
----
-
-import { Tabs } from 'nextra/components'
-import { Callout, Steps } from 'nextra/components'
-
-# llama.cpp (Cortex)
-
-## Overview
-
-Jan has [**Cortex**](https://github.com/janhq/cortex) - a default C++ inference server built on top of [llama.cpp](https://github.com/ggerganov/llama.cpp). This server provides an OpenAI-compatible API, queues, scaling, and additional features on top of the wide capabilities of `llama.cpp`.
-
-This guide shows you how to initialize the `llama.cpp` to download and install the required dependencies to start chatting with a model using the `llama.cpp` engine.
-
-## Prerequisites
-- Mac Intel:
- - Make sure you're using an Intel-based Mac. For a complete list of supported Intel CPUs, please see [here](https://en.wikipedia.org/wiki/MacBook_Pro_(Intel-based)).
- - For Mac Intel, it is recommended to utilize smaller models.
-- Mac Sillicon:
- - Make sure you're using a Mac Silicon. For a complete list of supported Apple Silicon CPUs, please see [here](https://en.wikipedia.org/wiki/Apple_Silicon).
- - Using an adequate model size based on your hardware is recommended for Mac Silicon.
-
- This can use Apple GPU with Metal by default for acceleration. Apple ANE is not supported yet.
-
-- Windows:
- - Ensure that you have **Windows with x86_64** architecture.
-- Linux:
- - Ensure that you have **Linux with x86_64** architecture.
-
-#### GPU Acceleration Options
-Enable the GPU acceleration option within the Jan application by following the [Installation Setup](/docs/desktop-installation) guide.
-## Step-by-step Guide
-
-### Step 1: Open the `model.json`
-1. Open [Jan Data Folder](/docs/data-folder#open-jan-data-folder)
-
-
-
-
-
-2. Select **models** folder > Click **model folder** that you want to modify > click `model.json`
-3. Once open, `model.json` file looks like below, use model "TinyLlama Chat 1.1B Q4" as an example:
-```json
-{
- "sources": [
- {
- "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
- "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
- }
- ],
- "id": "tinyllama-1.1b",
- "object": "model",
- "name": "TinyLlama Chat 1.1B Q4",
- "version": "1.0",
- "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
- "format": "gguf",
- "settings": {
- "ctx_len": 4096,
- "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
- "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
- },
- "parameters": {
- "temperature": 0.7,
- "top_p": 0.95,
- "stream": true,
- "max_tokens": 2048,
- "stop": [],
- "frequency_penalty": 0,
- "presence_penalty": 0
- },
- "metadata": {
- "author": "TinyLlama",
- "tags": [
- "Tiny",
- "Foundation Model"
- ],
- "size": 669000000
- },
- "engine": "nitro"
-}
-```
-### Step 2: Modify the `model.json`
-1. Modify the model's engine settings under the settings array. You can modify the settings with the following parameters:
-
-| Parameter | Type | Description |
-| --------------- | ----------- | ---------------------------------------------------------------------------------------------------------------------------------- |
-| `ctx_len` | **Integer** | Provides ample context for model operations like `GPT-3.5`. The default value is `2048` (_Maximum_: `4096`, _Minimum_: `1`). |
-| `prompt_template` | **String** | Defines the template used to format prompts |
-| `model_path` | **String** | Specifies the path to the model `.GGUF` file. |
-| `ngl` | **Integer** | Determines GPU layer usage. The default value is `100`. |
-| `cpu_threads` | **Integer** | Determines CPU inference threads, limited by hardware and OS. (_Maximum_ determined by system) |
-| `cont_batching` | **Integer** | Controls continuous batching, enhancing throughput for LLM inference. |
-| `embedding` | **Integer** | Enables embedding utilization for tasks like document-enhanced chat in RAG-based applications. |
-2. Save the `model.json` file.
-
- If you use a different model, you must set it up again. As this only affects the selected model.
-
-### Step 3: Start the Model
-1. Restart the Jan application to apply your settings.
-2. Navigate to the **Threads**.
-3. Chat with your model.
-
-
- - To utilize the embedding feature, include the JSON parameter `"embedding": true`. It will enable Nitro to process inferences with embedding capabilities. Please refer to the [Embedding in the Nitro documentation](https://nitro.jan.ai/features/embed) for a more detailed explanation.
- - To utilize the continuous batching feature for boosting throughput and minimizing latency in large language model (LLM) inference, include `cont_batching: true`. For details, please refer to the [Continuous Batching in the Nitro documentation](https://nitro.jan.ai/features/cont-batch).
-
-
-
-
- If you have questions, please join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.
-
diff --git a/docs/src/pages/docs/install-engines.mdx b/docs/src/pages/docs/install-engines.mdx
new file mode 100644
index 000000000..3a6814d88
--- /dev/null
+++ b/docs/src/pages/docs/install-engines.mdx
@@ -0,0 +1,166 @@
+---
+title: Install Engines
+description: Learn about Jan's default extensions and explore how to configure them.
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Jan Extensions,
+ Extensions,
+ ]
+---
+
+import { Callout } from 'nextra/components'
+import { Settings, EllipsisVertical } from 'lucide-react'
+
+# Install Engines
+
+## Install Local Engines
+Jan currently doesn't support installing a local engine yet.
+
+## Install Remote Engines
+
+### Step-by-step Guide
+You can add any OpenAI API-compatible providers like OpenAI, Anthropic, or others.
+To add a new remote engine:
+
+1. Navigate to **Settings** () > **Engines**
+1. At **Remote Engine** category, click **+ Install Engine**
+2. Fill in the following required information:
+
+| Field | Description | Required |
+|-------|-------------|----------|
+| Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
+| API URL | The base URL of the provider's API | ✓ |
+| API Key | Your authentication key from the provider | ✓ |
+| Model List URL | URL for fetching available models | |
+| API Key Template | Custom authorization header format | |
+| Request Format Conversion | Function to convert Jan's request format to provider's format | |
+| Response Format Conversion | Function to convert provider's response format to Jan's format | |
+
+
+> - The conversion functions are only needed for providers that don't follow the OpenAI API format. For OpenAI-compatible APIs, you can leave these empty.
+> - For OpenAI-compatible APIs like OpenAI, Anthropic, or Groq, you only need to fill in the required fields. Leave optional fields empty.
+
+4. Click **Install** to complete
+
+### Examples
+#### OpenAI-Compatible Setup
+Here's how to set up OpenAI as a remote engine:
+
+1. Engine Name: `OpenAI`
+2. API URL: `https://api.openai.com`
+3. Model List URL: `https://api.openai.com/v1/models`
+4. API Key: Your OpenAI API key
+5. Leave other fields as default
+
+
+#### Custom APIs Setup
+If you're integrating an API that doesn't follow OpenAI's format, you'll need to use the conversion functions.
+Let's say you have a custom API with this format:
+
+```javascript
+// Custom API Request Format
+{
+ "prompt": "What is AI?",
+ "max_length": 100,
+ "temperature": 0.7
+}
+
+// Custom API Response Format
+{
+ "generated_text": "AI is...",
+ "tokens_used": 50,
+ "status": "success"
+}
+```
+
+Here's how to set it up in Jan:
+```
+Engine Name: Custom LLM
+API URL: https://api.customllm.com
+API Key: your_api_key_here
+```
+
+**Conversion Functions:**
+1. Request Format Conversion:
+```javascript
+function convertRequest(janRequest) {
+ return {
+ prompt: janRequest.messages[janRequest.messages.length - 1].content,
+ max_length: janRequest.max_tokens || 100,
+ temperature: janRequest.temperature || 0.7
+ }
+}
+```
+
+2. Response Format Conversion:
+```javascript
+function convertResponse(apiResponse) {
+ return {
+ choices: [{
+ message: {
+ role: "assistant",
+ content: apiResponse.generated_text
+ }
+ }],
+ usage: {
+ total_tokens: apiResponse.tokens_used
+ }
+ }
+}
+```
+
+
+The conversion functions should:
+- Request: Convert from Jan's OpenAI-style format to your API's format
+- Response: Convert from your API's format back to OpenAI-style format
+
+
+**Expected Formats:**
+
+1. Jan's Request Format
+```javascript
+{
+ "messages": [
+ {"role": "user", "content": "What is AI?"}
+ ],
+ "max_tokens": 100,
+ "temperature": 0.7
+}
+```
+
+2. Jan's Expected Response Format
+```javascript
+{
+ "choices": [{
+ "message": {
+ "role": "assistant",
+ "content": "AI is..."
+ }
+ }],
+ "usage": {
+ "total_tokens": 50
+ }
+}
+```
+
+
+Make sure to test your conversion functions thoroughly. Incorrect conversions may cause errors or unexpected behavior.
+
+
+
+
+
+
+
+
+
+
+
diff --git a/docs/src/pages/docs/installing-extension.mdx b/docs/src/pages/docs/install-extensions.mdx
similarity index 95%
rename from docs/src/pages/docs/installing-extension.mdx
rename to docs/src/pages/docs/install-extensions.mdx
index a560f948e..1e8078034 100644
--- a/docs/src/pages/docs/installing-extension.mdx
+++ b/docs/src/pages/docs/install-extensions.mdx
@@ -24,7 +24,7 @@ import { Callout } from 'nextra/components'
Jan comes with several [pre-installed extensions](/docs/extensions#core-extensions) that provide core functionalities. You can manually add custom third-party extensions at your own risk.
-## Creating Extensions
+## Create Extensions
Jan currently only accepts `.tgz` file format for extensions.
@@ -33,7 +33,7 @@ Jan currently only accepts `.tgz` file format for extensions.
> **Heads Up:**
> - Please use the following structure and setup as a **reference** only.
> - You're free to develop extensions using any approach or structure that works for your needs. As long as your extension can be packaged as a `.tgz` file, it can be installed in Jan. Feel free to experiment and innovate!
-> - If you already have your own `.tgz` extension file, please move forward to [install extension](/docs/installing-extension#install-extensions) step.
+> - If you already have your own `.tgz` extension file, please move forward to [install extension](/docs/install-extensions#install-extensions) step.
#### Extension Structure
Your extension should follow this basic structure:
diff --git a/docs/src/pages/docs/built-in/_meta.json b/docs/src/pages/docs/local-engines/_meta.json
similarity index 53%
rename from docs/src/pages/docs/built-in/_meta.json
rename to docs/src/pages/docs/local-engines/_meta.json
index 0b15c47f2..8c2ab4fe2 100644
--- a/docs/src/pages/docs/built-in/_meta.json
+++ b/docs/src/pages/docs/local-engines/_meta.json
@@ -1,10 +1,10 @@
{
"llama-cpp": {
"title": "llama.cpp",
- "href": "/docs/built-in/llama-cpp"
+ "href": "/docs/local-engines/llama-cpp"
},
"tensorrt-llm": {
"title": "TensorRT-LLM",
- "href": "/docs/built-in/tensorrt-llm"
+ "href": "/docs/local-engines/tensorrt-llm"
}
}
diff --git a/docs/src/pages/docs/local-engines/llama-cpp.mdx b/docs/src/pages/docs/local-engines/llama-cpp.mdx
new file mode 100644
index 000000000..206bd06f0
--- /dev/null
+++ b/docs/src/pages/docs/local-engines/llama-cpp.mdx
@@ -0,0 +1,82 @@
+---
+title: llama.cpp
+description: A step-by-step guide on how to customize the llama.cpp engine.
+keywords:
+ [
+ Jan,
+ Customizable Intelligence, LLM,
+ local AI,
+ privacy focus,
+ free and open source,
+ private and offline,
+ conversational AI,
+ no-subscription fee,
+ large language models,
+ Llama CPP integration,
+ llama.cpp Engine,
+ Intel CPU,
+ AMD CPU,
+ NVIDIA GPU,
+ AMD GPU Radeon,
+ Apple Silicon,
+ Intel Arc GPU,
+ ]
+---
+
+import { Tabs } from 'nextra/components'
+import { Callout, Steps } from 'nextra/components'
+import { Settings, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
+
+# llama.cpp (Cortex)
+
+## Overview
+Jan uses **llama.cpp** for running local AI models. You can find its settings in **Settings** () > **Local Engine** > **llama.cpp**:
+
+
+
+
+
+These settings are for advanced users, you would want to check these settings when:
+- Your AI models are running slowly or not working
+- You've installed new hardware (like a graphics card)
+- You want to tinker & test performance with different [backends](/docs/local-engines/llama-cpp#available-backends)
+
+## Engine Version and Updates
+- **Engine Version**: View current version of llama.cpp engine
+- **Check Updates**: Verify if a newer version is available & install available updates when it's available
+
+
+## Available Backends
+
+Jan offers different backend variants for **llama.cpp** based on your operating system, you can:
+- Download different backends as needed
+- Switch between backends for different hardware configurations
+- View currently installed backends in the list
+
+
+Choose the backend that matches your hardware. Using the wrong variant may cause performance issues or prevent models from loading.
+
+
+### macOS
+- `mac-arm64`: For Apple Silicon Macs (M1/M2/M3)
+- `mac-amd64`: For Intel-based Macs
+
+### Windows
+- `win-cuda`: For NVIDIA GPUs using CUDA
+- `win-cpu`: For CPU-only operation
+- `win-directml`: For DirectML acceleration (AMD/Intel GPUs)
+- `win-opengl`: For OpenGL acceleration
+
+### Linux
+- `linux-cuda`: For NVIDIA GPUs using CUDA
+- `linux-cpu`: For CPU-only operation
+- `linux-rocm`: For AMD GPUs using ROCm
+- `linux-openvino`: For Intel GPUs/NPUs using OpenVINO
+- `linux-vulkan`: For Vulkan acceleration
+
+
+For detailed hardware compatibility, please visit our guide for [Mac](/docs/desktop/mac#compatibility), [Windows](/docs/desktop/windows#compatibility), and [Linux](docs/desktop/linux).
+
+
+
+
diff --git a/docs/src/pages/docs/built-in/tensorrt-llm.mdx b/docs/src/pages/docs/local-engines/tensorrt-llm.mdx
similarity index 57%
rename from docs/src/pages/docs/built-in/tensorrt-llm.mdx
rename to docs/src/pages/docs/local-engines/tensorrt-llm.mdx
index 6ccbf0d58..c41e6e41f 100644
--- a/docs/src/pages/docs/built-in/tensorrt-llm.mdx
+++ b/docs/src/pages/docs/local-engines/tensorrt-llm.mdx
@@ -19,25 +19,51 @@ keywords:
]
---
+import { Tabs } from 'nextra/components'
import { Callout, Steps } from 'nextra/components'
+import { Settings, EllipsisVertical, Plus, FolderOpen, Pencil } from 'lucide-react'
# TensorRT-LLM
-
## Overview
+Jan uses **TensorRT-LLM** as an optional engine for faster inference on NVIDIA GPUs. This engine uses [Cortex-TensorRT-LLM](https://github.com/janhq/cortex.tensorrt-llm), which includes an efficient C++ server that executes the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html) natively. It also includes features and performance improvements like OpenAI compatibility, tokenizer improvements, and queues.
-This guide walks you through installing Jan's official [TensorRT-LLM Engine](https://github.com/janhq/nitro-tensorrt-llm). This engine uses [Cortex-TensorRT-LLM](https://github.com/janhq/cortex.tensorrt-llm) as the AI engine instead of the default [Cortex-Llama-CPP](https://github.com/janhq/cortex). It includes an efficient C++ server that executes the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html) natively. It also includes features and performance improvements like OpenAI compatibility, tokenizer improvements, and queues.
-
-
- This feature is only available for Windows users. Linux is coming soon.
+
+Currently only available for **Windows** users, **Linux** support is coming soon!
-### Pre-requisites
+You can find its settings in **Settings** () > **Local Engine** > **TensorRT-LLM**:
-- A **Windows** PC.
-- **Nvidia GPU(s)**: Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon.
-- Sufficient disk space for the TensorRT-LLM models and data files (space requirements vary depending on the model size).
+## Requirements
+- NVIDIA GPU with Compute Capability 7.0 or higher (RTX 20xx series and above)
+- Minimum 8GB VRAM (16GB+ recommended for larger models)
+- Updated NVIDIA drivers
+- CUDA Toolkit 11.8 or newer
+
+
+For detailed setup guide, please visit [Windows](/docs/desktop/windows#compatibility).
+
+
+## Engine Version and Updates
+- **Engine Version**: View current version of TensorRT-LLM engine
+- **Check Updates**: Verify if a newer version is available & install available updates when it's available
+
+## Available Backends
+
+TensorRT-LLM is specifically designed for NVIDIA GPUs. Available backends include:
+
+**Windows**
+- `win-cuda`: For NVIDIA GPUs with CUDA support
+
+
+TensorRT-LLM requires an NVIDIA GPU with CUDA support. It is not compatible with other GPU types or CPU-only systems.
+
+
+
+
+## Enable TensorRT-LLM
+
### Step 1: Install TensorRT-Extension
@@ -74,15 +100,4 @@ We offer a handful of precompiled models for Ampere and Ada cards that you can i
3. Click **Download** to download the model.
-### Step 3: Configure Settings
-
-1. Navigate to the Thread section.
-2. Select the model that you have downloaded.
-3. Customize the default parameters of the model for how Jan runs TensorRT-LLM.
-
- Please see [here](/docs/models/model-parameters) for more detailed model parameters.
-
-
-
-
\ No newline at end of file
diff --git a/docs/src/pages/docs/models/manage-models.mdx b/docs/src/pages/docs/models/manage-models.mdx
index dbbfa5d2d..e8889c9d8 100644
--- a/docs/src/pages/docs/models/manage-models.mdx
+++ b/docs/src/pages/docs/models/manage-models.mdx
@@ -108,10 +108,13 @@ You need to own your **model configurations**, use at your own risk. Misconfigur
#### 4. Manual Setup
For advanced users who add a specific model that is not available within Jan **Hub**:
-1. Navigate to `~/jan/data/models/`
-2. Create a new **Folder** for your model
-3. Add a `model.json` file with your configuration:
-```
+
+##### Step 1: Create Model File
+1. Navigate to [Jan Data Folder]()
+2. Open `models` folder
+3. Create a new **Folder** for your model
+4. Add a `model.json` file with your configuration:
+```bash
"id": "",
"object": "",
"name": "",
@@ -130,9 +133,50 @@ For advanced users who add a specific model that is not available within Jan **H
"engine": "",
"source": ""
```
-Key fields to configure:
+Here's model "TinyLlama Chat 1.1B Q4" as an example:
+```json
+{
+ "sources": [
+ {
+ "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+ "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ }
+ ],
+ "id": "tinyllama-1.1b",
+ "object": "model",
+ "name": "TinyLlama Chat 1.1B Q4",
+ "version": "1.0",
+ "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
+ "format": "gguf",
+ "settings": {
+ "ctx_len": 4096,
+ "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
+ "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+ },
+ "parameters": {
+ "temperature": 0.7,
+ "top_p": 0.95,
+ "stream": true,
+ "max_tokens": 2048,
+ "stop": [],
+ "frequency_penalty": 0,
+ "presence_penalty": 0
+ },
+ "metadata": {
+ "author": "TinyLlama",
+ "tags": [
+ "Tiny",
+ "Foundation Model"
+ ],
+ "size": 669000000
+ },
+ "engine": "nitro"
+}
+```
+##### Step 2: Modify Model Parameters
+Modify model parameters under the settings array. Key fields to configure:
1. **Settings** is where you can set your engine configurations.
-2. [**Parameters**](/docs/models#model-parameters) are the adjustable settings that affect how your model operates or processes the data. The fields in parameters are typically general and can be the same across models. Here is an example of model parameters:
+2. [**Parameters**](/docs/models/model-parameters) are the adjustable settings that affect how your model operates or processes the data. The fields in parameters are typically general and can be the same across models. Here is an example of model parameters:
```
"parameters":{
"temperature": 0.7,
@@ -142,7 +186,7 @@ Key fields to configure:
"frequency_penalty": 0,
"presence_penalty": 0
```
-
+
### Delete Models
1. Go to **Settings** () > **My Models**