Merge branch 'main' into docs/fix-error-link

2024-01-05 14:12:31 +07:00 · 2024-01-05 14:12:31 +07:00 · b95a5a3cba
commit b95a5a3cba
parent 9624a120bc 103b44ddbe
67 changed files with 843 additions and 492 deletions
--- a/README.md
+++ b/README.md
@ -70,25 +70,25 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
  <tr style="text-align:center">
    <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/0.4.3-118/jan-win-x64-0.4.3-118.exe'>
+      <a href='https://delta.jan.ai/0.4.3-123/jan-win-x64-0.4.3-123.exe'>
        <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
        <b>jan.exe</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/0.4.3-118/jan-mac-x64-0.4.3-118.dmg'>
+      <a href='https://delta.jan.ai/0.4.3-123/jan-mac-x64-0.4.3-123.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>Intel</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/0.4.3-118/jan-mac-arm64-0.4.3-118.dmg'>
+      <a href='https://delta.jan.ai/0.4.3-123/jan-mac-arm64-0.4.3-123.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>M1/M2</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/0.4.3-118/jan-linux-amd64-0.4.3-118.deb'>
+      <a href='https://delta.jan.ai/0.4.3-123/jan-linux-amd64-0.4.3-123.deb'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.deb</b>
      </a>
--- a/USAGE.md
+++ b/USAGE.md
@ -0,0 +1,60 @@
+## Requirements for running Jan App in GPU mode on Windows and Linux
+- You must have an NVIDIA driver that supports CUDA 11.4 or higher. Refer [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
+    To check if the NVIDIA driver is installed, open PowerShell or Terminal and enter the following command:
+    ```bash
+    nvidia-smi
+    ```
+    If you see a result similar to the following, you have successfully installed the NVIDIA driver:
+    ```bash
+    +-----------------------------------------------------------------------------+
+    | NVIDIA-SMI 470.57.02    Driver Version: 470.57.02    CUDA Version: 11.4     |
+    |-------------------------------+----------------------+----------------------+
+    | GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
+    | Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
+    |                               |                      |               MIG M. |
+    |===============================+======================+======================|
+    |   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0  On |                  N/A |
+    |  0%   51C    P8    10W / 170W |    364MiB /  7982MiB |      0%      Default |
+    |                               |                      |                  N/A |
+    +-------------------------------+----------------------+----------------------+
+    ```
+
+- You must have CUDA 11.4 or higher.
+    To check if CUDA is installed, open PowerShell or Terminal and enter the following command:
+    ```bash
+    nvcc --version
+    ```
+    If you see a result similar to the following, you have successfully installed CUDA:
+    ```bash
+    nvcc: NVIDIA (R) Cuda compiler driver
+
+    Cuda compilation tools, release 11.4, V11.4.100
+    Build cuda_11.4.r11.4/compiler.30033411_0
+    ```
+
+- Specifically for Linux, you will need to have a CUDA compatible driver, refer [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver), and you must add the `.so` libraries of CUDA and the CUDA compatible driver to the `LD_LIBRARY_PATH` environment variable, refer [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions).
+  
+## How to switch mode CPU/GPU Jan app
+
+By default, Jan app will run in CPU mode. When starting Jan app, the program will automatically check if your computer meets the requirements to run in GPU mode. If it does, we will automatically enable GPU mode and pick the GPU has highest VGRAM for you (feature allowing users to select one or more GPU devices for use - currently in planning). You can check whether you are using CPU mode or GPU mode in the settings/advance section of Jan app. (see image below). ![](/docs/static/img/usage/jan-gpu-enable-setting.png)
+
+If you have GPU mode but it is not enabled by default, the following possibilities may exist, you can follow the next steps to fix the error:
+
+1. You have not installed the NVIDIA driver, refer to the NVIDIA driver that supports CUDA 11.4 [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
+
+2. You have not installed the CUDA toolkit or your CUDA toolkit is not compatible with the NVIDIA driver, refer to CUDA compatibility [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
+
+3. You have not installed a CUDA compatible driver, refer [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver), and you must add the `.so` libraries of CUDA and the CUDA compatible driver to the `LD_LIBRARY_PATH` environment variable, refer [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions). For Windows, add the `.dll` libraries of CUDA and the CUDA compatible driver to the `PATH` environment variable. Usually, when installing CUDA on Windows, this environment variable is automatically added, but if you do not see it, you can add it manually by referring [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#environment-setup).
+
+## To check the current GPU-related settings that Jan app has detected, you can go to the Settings/Advanced section as shown in the image below.
+![](/docs/static/img/usage/jan-open-home-directory.png)
+![](/docs/static/img/usage/jan-open-settings-1.png)
+![](/docs/static/img/usage/jan-open-settings-2.png)
+![](/docs/static/img/usage/jan-open-settings-3.png)
+
+When you have an issue with GPU mode, share the `settings.json` with us will help us to solve the problem faster.
+
+## Tested on
+
+- Windows 11 Pro 64-bit, NVIDIA GeForce RTX 4070ti GPU, CUDA 12.2, NVIDIA driver 531.18
+- Ubuntu 22.04 LTS, NVIDIA GeForce RTX 4070ti GPU, CUDA 12.2, NVIDIA driver 545
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@ -53,9 +53,10 @@ export enum FileSystemRoute {
  writeFileSync = 'writeFileSync',
 }
 export enum FileManagerRoute {
-  synceFile = 'syncFile',
+  syncFile = 'syncFile',
  getUserSpace = 'getUserSpace',
  getResourcePath = 'getResourcePath',
+  fileStat = 'fileStat',
 }

 export type ApiFunction = (...args: any[]) => any
--- a/core/src/core.ts
+++ b/core/src/core.ts
@ -1,3 +1,5 @@
+import { FileStat } from './types'
+
 /**
 * Execute a extension module function in main process
 *
@ -74,6 +76,15 @@ const openExternalUrl: (url: string) => Promise<any> = (url) =>
 */
 const getResourcePath: () => Promise<string> = () => global.core.api?.getResourcePath()

+/**
+ * Gets the file's stats.
+ *
+ * @param path - The path to the file.
+ * @returns {Promise<FileStat>} - A promise that resolves with the file's stats.
+ */
+const fileStat: (path: string) => Promise<FileStat | undefined> = (path) =>
+  global.core.api?.fileStat(path)
+
 /**
 * Register extension point function type definition
 */
@ -97,4 +108,5 @@ export {
  joinPath,
  openExternalUrl,
  baseName,
+  fileStat,
 }
--- a/core/src/node/api/routes/fileManager.ts
+++ b/core/src/node/api/routes/fileManager.ts
@ -0,0 +1,12 @@
+import { FileManagerRoute } from '../../../api'
+import { HttpServer } from '../../index'
+
+export const fsRouter = async (app: HttpServer) => {
+  app.post(`/app/${FileManagerRoute.syncFile}`, async (request: any, reply: any) => {})
+
+  app.post(`/app/${FileManagerRoute.getUserSpace}`, async (request: any, reply: any) => {})
+
+  app.post(`/app/${FileManagerRoute.getResourcePath}`, async (request: any, reply: any) => {})
+
+  app.post(`/app/${FileManagerRoute.fileStat}`, async (request: any, reply: any) => {})
+}
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@ -0,0 +1,4 @@
+export type FileStat = {
+  isDirectory: boolean
+  size: number
+}
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@ -4,3 +4,4 @@ export * from './thread'
 export * from './message'
 export * from './inference'
 export * from './monitoring'
+export * from './file'
--- a/docs/docs/about/about.md
+++ b/docs/docs/about/about.md
@ -64,7 +64,7 @@ Jan is a startup with an open source business model. We believe in the need for
 We use Github to build in public, and welcome anyone to join in.  

 - [Jan's Kanban](https://github.com/orgs/janhq/projects/5)
- [Jan's Roadmap](https://github.com/orgs/janhq/projects/5/views/2)
+- [Jan's Roadmap](https://github.com/orgs/janhq/projects/5/views/29)

 ### Bootstrapped

--- a/docs/docs/guides/00-overview.md
+++ b/docs/docs/guides/00-overview.md
@ -15,36 +15,32 @@ keywords:
  ]
 ---

-Jan is a ChatGPT-alternative that runs on your own computer, with a [local API server](/api-reference).
+Jan is a ChatGPT alternative that runs on your own computer, with a [local API server](/guides/using-server).

-Jan uses [open-source AI models](/docs/engineering/models), stores data in [open file formats](/developer/file-based), is highly customizable via [extensions](/developer/build-extension).
+We believe in the need for an open source AI ecosystem. We're focused on building infra, tooling and [custom models](https://huggingface.co/janhq) to allow open source AIs to compete on a level playing field with proprietary offerings.

-Jan believes in the need for an open source AI ecosystem. We aim to build infra and tooling to allow open source AIs to compete on a level playing field with proprietary offerings.
+## Features
+
+- Compatible with [open-source models](/guides/using-models) (GGUF, TensorRT, and remote APIs)
+- Compatible with most OSes: [Windows](/install/windows/), [Mac](/install/mac), [Linux](/install/linux), with/without GPU acceleration
+- Stores data in [open file formats](/developer/file-based)
+- Customizable via [extensions](/developer/build-extension)
+- And more in the [roadmap](https://github.com/orgs/janhq/projects/5/views/16). Join us on [Discord](https://discord.gg/5rQ2zTv3be) and tell us what you want to see!

 ## Why Jan?

 #### 💻 Own your AI

-Jan runs 100% on your own machine, [predictably](https://www.reddit.com/r/LocalLLaMA/comments/17mghqr/comment/k7ksti6/?utm_source=share&utm_medium=web2x&context=3), privately and even offline. No one else can see your conversations, not even us.
+Jan runs 100% on your own machine, predictably, privately and offline. No one else can see your conversations, not even us.

 #### 🏗️ Extensions

-Jan ships with a powerful [extension framework](/developer/build-extension), which allows developers to extend and customize Jan's functionality. In fact, most core modules of Jan are [built as extensions](/developer/architecture) and use the same extensions API.
+Jan ships with a local-first, AI-native, and cross platform [extensions framework](/developer/build-extension). Developers can extend and customize everything from functionality to UI to branding. In fact, Jan's current main features are actually built as extensions on top of this framework.

 #### 🗂️ Open File Formats

-Jan stores data in a [local folder of non-proprietary files](/developer/architecture). You're never locked-in and can do what you want with your data with extensions, or even a different app.
+Jan stores data in your [local filesystem](/developer/file-based). Your data never leaves your computer. You are free to delete, export, migrate your data, even to a different platform.

 #### 🌍 Open Source

 Both Jan and [Nitro](https://nitro.jan.ai), our lightweight inference engine, are licensed via the open source [AGPLv3 license](https://github.com/janhq/jan/blob/main/LICENSE).
-
-<!-- ## Design Principles -->
-
-<!-- OpenAI meets VSCode meets Obsidian.
-
-Minimalism: https://docusaurus.io/docs#design-principles. Not having abstractions is better than having the wrong abstractions. Assistants as code. Only including features that are absolutely necessary in the Jan API.
-
-File-based: User should be able to look at a Jan directory and intuit how it works. Transparency. Editing things via a text editor, vs. needing a database tool for SQLite.
-
-Participatory: https://www.getlago.com/blog/the-5-reasons-why-we-chose-open-source -->
--- a/docs/docs/guides/02-installation/05-hardware.md
+++ b/docs/docs/guides/02-installation/05-hardware.md
@ -0,0 +1,55 @@
+---
+title: Hardware Requirements
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+  [
+    Jan AI,
+    Jan,
+    ChatGPT alternative,
+    local AI,
+    private AI,
+    conversational AI,
+    no-subscription fee,
+    large language model,
+  ]
+---
+
+Jan is designed to be lightweight and able to run Large Language Models (LLMs) out-of-the-box.
+
+The current download size is less than 150 MB and has a disk space of ~300 MB.
+
+To ensure optimal performance, please see the following system requirements:
+
+## Disk Space
+
+- Minimum requirement
+  - At least 5 GB of free disk space is required to accommodate the download, storage, and management of open-source LLM models.
+- Recommended
+  - For an optimal experience and to run most available open-source LLM models on Jan, it is recommended to have 10 GB of free disk space.
+
+## RAM and GPU VRAM
+
+The amount of RAM on your system plays a crucial role in determining the size and complexity of LLM models you can effectively run. Jan can be utilized on traditional computers where RAM is a key resource. For enhanced performance, Jan also supports GPU acceleration, utilizing the VRAM of your graphics card.
+
+## Best Models for your V/RAM
+
+The RAM and GPU VRAM requirements are dependent on the size and complexity of the LLM models you intend to run. The following are some general guidelines to help you determine the amount of RAM or VRAM you need to run LLM models on Jan
+
+- `8 GB of RAM`: Suitable for running smaller models like 3B models or quantized 7B models
+- `16 GB of RAM (recommended)`: This is considered the "minimum usable models" threshold, particularly for 7B models (e.g Mistral 7B, etc)
+- `Beyond 16GB of RAM`: Required for handling larger and more sophisticated model, such as 70B models.
+
+## Architecture
+
+Jan is designed to run on muptiple architectures, versatility and widespread usability. The supported architectures include:
+
+### CPU Support
+
+- `x86`: Jan is well-suited for systems with x86 architecture, which is commonly found in traditional desktops and laptops. It ensures smooth performance on a variety of devices using x86 processors.
+- `ARM`: Jan is optimized to run efficiently on ARM-based systems, extending compatibility to a broad range of devices using ARM processors.
+
+### GPU Support
+
+- `NVIDIA`
+- `AMD`
+- `ARM64 Mac`
--- a/docs/docs/guides/02-installation/README.mdx
+++ b/docs/docs/guides/02-installation/README.mdx
@ -21,9 +21,8 @@ import TabItem from "@theme/TabItem";
 In this quickstart we'll show you how to:

 - Download the Jan Desktop client - Mac, Windows, Linux, (and toaster) compatible
- Download and customize models
- Import custom models
- Use the local server at port `1337`
+- Download the Nightly (unstable) version
+- Build the application from source

 ## Setup

@ -50,89 +49,3 @@ In this quickstart we'll show you how to:
 - To build Jan Desktop from scratch (and have the right to tinker!)

  See the [Build from Source](/install/from-source) guide.
-
-### Working with Models
-
-Jan provides a list of recommended models to get you started.
-You can find them in the in-app Hub.
-
-1. `cmd + k` and type "hub" to open the Hub.
-2. Download your preferred models.
-3. `cmd + k` and type "chat" to open the conversation UI and start chatting.
-4. Your model may take a few seconds to start up.
-5. You can customize the model settings, at each conversation thread level, on the right panel.
-6. To change model defaults globally, edit the `model.json` file. See the [Models](/guides/models) guide.
-
-### Importing Models
-
-Jan is compatible with all GGUF models.
-
-For more information on how to import custom models, not found in the Hub, see the [Models](/guides/models) guide.
-
-## Working with the Local Server
-
-> This feature is currently under development. So expect bugs!
-
-Jan runs a local server on port `1337` by default.
-
-The endpoints are OpenAI compatible.
-
-See the [API server guide](/guides/server) for more information.
-
-## Next Steps
-
---
-
-TODO: Merge this in:
-
-Getting up and running open-source AI models on your own computer with Jan is quick and easy. Jan is lightweight and can run on a variety of hardware and platform versions. Specific requirements tailored to your platform are outlined below.
-
-## Cross platform
-
-A free, open-source alternative to OpenAI that runs on the Linux, macOS, and Windows operating systems. Please refer to the specific guides below for your platform
-
- [Linux](/install/linux)
- [MacOS (Mac Intel Chip and Mac Apple Silicon Chip)](/install/mac)
- [Windows](/install/windows)
-
-## Requirements for Jan
-
-### Hardware
-
-Jan is a lightweight platform designed for seamless download, storage, and execution of open-source Large Language Models (LLMs). With a small download size of less than 200 MB and a disk footprint of under 300 MB, Jan is optimized for efficiency and should run smoothly on modern hardware.
-
-To ensure optimal performance while using Jan and handling LLM models, it is recommended to meet the following system requirements:
-
-#### Disk space
-
- Minimum requirement
-  - At least 5 GB of free disk space is required to accommodate the download, storage, and management of open-source LLM models.
- Recommended
-  - For an optimal experience and to run most available open-source LLM models on Jan, it is recommended to have 10 GB of free disk space.
-
-#### Random Access Memory (RAM) and Graphics Processing Unit Video Random Access Memory (GPU VRAM)
-
-The amount of RAM on your system plays a crucial role in determining the size and complexity of LLM models you can effectively run. Jan can be utilized on traditional computers where RAM is a key resource. For enhanced performance, Jan also supports GPU acceleration, utilizing the VRAM of your graphics card.
-
-#### Relationship between RAM and VRAM Sizes in Relation to LLM Models
-
-The RAM and GPU VRAM requirements are dependent on the size and complexity of the LLM models you intend to run. The following are some general guidelines to help you determine the amount of RAM or VRAM you need to run LLM models on Jan
-
- 8 GB of RAM: Suitable for running smaller models like 3B models or quantized 7B models
- 16 GB of RAM(recommended): This is considered the "minimum usable models" threshold, particularly for 7B models (e.g Mistral 7B, etc)
- Beyond 16GB of RAM: Required for handling larger and more sophisticated model, such as 70B models.
-
-### Architecture
-
-Jan is designed to run on muptiple architectures, versatility and widespread usability. The supported architectures include:
-
-#### CPU
-
- x86: Jan is well-suited for systems with x86 architecture, which is commonly found in traditional desktops and laptops. It ensures smooth performance on a variety of devices using x86 processors.
- ARM: Jan is optimized to run efficiently on ARM-based systems, extending compatibility to a broad range of devices using ARM processors.
-
-#### GPU
-
- NVIDIA: Jan optimizes the computational capabilities of NVIDIA GPUs, achieving efficiency through the utilization of llama.cpp. This strategic integration enhances the performance of Jan, particularly in resource-intensive Language Model (LLM) tasks. Users can expect accelerated processing and improved responsiveness when leveraging the processing capabilities inherent in NVIDIA GPUs.
- AMD: Users with AMD GPUs can seamlessly integrate Jan's GPU acceleration, offering a comprehensive solution for diverse hardware configurations and preferences.
- ARM64 Mac: Jan seamlessly supports ARM64 architecture on Mac systems, leveraging Metal for efficient GPU operations. This ensures a smooth and efficient experience for users with Apple Silicon Chips, utilizing the power of Metal for optimal performance on ARM64 Mac devices.
--- a/docs/docs/guides/03-chatting/01-start-thread.md
+++ b/docs/docs/guides/03-chatting/01-start-thread.md
@ -1,12 +0,0 @@
---
-title: Starting a Thread
---
-
-Rough outline:
-Choosing an assistant
-Setting assistant instructions
-  At thread level
-  Globally, as default
-Choosing a model
-Customizing model params (thread level)
-Customizing engine params
--- a/docs/docs/guides/03-chatting/02-upload-docs.md
+++ b/docs/docs/guides/03-chatting/02-upload-docs.md
@ -1,3 +0,0 @@
---
-title: Uploading docs
---
--- a/docs/docs/guides/03-chatting/03-upload-images.md
+++ b/docs/docs/guides/03-chatting/03-upload-images.md
@ -1,3 +0,0 @@
---
-title: Uploading Images
---
--- a/docs/docs/guides/03-chatting/04-manage-history.md
+++ b/docs/docs/guides/03-chatting/04-manage-history.md
@ -1,3 +1,56 @@
 ---
 title: Manage Chat History
+slug: /guides/chatting/manage-history/
+description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
+keywords:
+  [
+    Jan AI,
+    Jan,
+    ChatGPT alternative,
+    local AI,
+    private AI,
+    conversational AI,
+    no-subscription fee,
+    large language model,
+    manage-chat-history,
+  ]
 ---
+
+Jan offers a convenient and private way to interact with a conversational AI locally on your computer. This guide will walk you through how to manage your chat history with Jan, ensuring your interactions remain private and organized.
+
+## Viewing Chat History
+
+1. Navigate to the main dashboard.
+2. Locate the list of threads on the left side of the screen. This list shows all your conversations.
+3. Select a thread to view the conversation in the main chat window.
+4. Scroll up and down to view the entire chat history in the selected thread.
+
+<br></br>
+![viewing-chat-history](./assets/viewing-chat-history.gif)
+
+## Managing Threads via Folders
+
+This feature allows you to directly manage your thread history and configurations.
+
+1. Navigate to the Thread that you want to manage via the list of threads on the left side of the dashboard.
+2. Click on the three dots (⋮) on the `Thread` section on the right side of the dashboard. There are two options:
+
+- `Reveal in Finder` will open the folder containing the thread history and configurations.
+- `View as JSON` will open the thread.json file in your default browser.
+
+<br></br>
+![managing-threads-via-folders](./assets/managing-threads-via-folders.gif)
+
+## Clean Thread
+
+To streamline your conservation view, click on the three dots (⋮) on the thread you want to clean, then select `Clean Thread`. It will remove all messages from the thread. It is useful if you want to keep the thread settings, but want to remove the messages from the chat window.
+
+<br></br>
+![clean-thread](./assets/clean-thread.gif)
+
+## Delete Thread
+
+To delete a thread, click on the three dots (⋮) on the thread you want to delete, then select `Delete Thread`. It will remove the thread from the list of threads.
+
+<br></br>
+![delete-thread](./assets/delete-thread.gif)
--- a/docs/docs/guides/03-chatting/assets/choose-model.png
+++ b/docs/docs/guides/03-chatting/assets/choose-model.png
--- a/docs/docs/guides/03-chatting/assets/clean-thread.gif
+++ b/docs/docs/guides/03-chatting/assets/clean-thread.gif
--- a/docs/docs/guides/03-chatting/assets/customize-model-params.png
+++ b/docs/docs/guides/03-chatting/assets/customize-model-params.png
--- a/docs/docs/guides/03-chatting/assets/delete-thread.gif
+++ b/docs/docs/guides/03-chatting/assets/delete-thread.gif
--- a/docs/docs/guides/03-chatting/assets/managing-threads-via-folders.gif
+++ b/docs/docs/guides/03-chatting/assets/managing-threads-via-folders.gif
--- a/docs/docs/guides/03-chatting/assets/setting-assistant-instructions.png
+++ b/docs/docs/guides/03-chatting/assets/setting-assistant-instructions.png
--- a/docs/docs/guides/03-chatting/assets/setting-thread-title.png
+++ b/docs/docs/guides/03-chatting/assets/setting-thread-title.png
--- a/docs/docs/guides/03-chatting/assets/start-thread.gif
+++ b/docs/docs/guides/03-chatting/assets/start-thread.gif
--- a/docs/docs/guides/03-chatting/assets/viewing-chat-history.gif
+++ b/docs/docs/guides/03-chatting/assets/viewing-chat-history.gif
--- a/docs/docs/guides/04-using-models/04-customize-models.md
+++ b/docs/docs/guides/04-using-models/04-customize-models.md
@ -1,3 +0,0 @@
---
-title: Customize Model Defaults
---
--- a/docs/docs/guides/04-using-models/05-package-models.md
+++ b/docs/docs/guides/04-using-models/05-package-models.md
@ -1,3 +0,0 @@
---
-title: Package & Publish Models
---
--- a/docs/openapi/jan.yaml
+++ b/docs/openapi/jan.yaml
@ -1,31 +1,31 @@
+---
 openapi: 3.0.0
 info:
  title: API Reference
  description: >
    # Introduction

-    Jan API is compatible with the [OpenAI
-    API](https://platform.openai.com/docs/api-reference).
+    Jan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).
 version: 0.1.8
 contact:
  name: Jan Discord
-  url: "https://discord.gg/7EcEz7MrvA"
+  url: https://discord.gg/7EcEz7MrvA
 license:
  name: AGPLv3
-  url: "https://github.com/janhq/nitro/blob/main/LICENSE"
+  url: https://github.com/janhq/nitro/blob/main/LICENSE
 servers:
-  - url: "http://localhost:1337/v1/"
+  - url: http://localhost:1337/v1/
 tags:
  - name: Models
    description: List and describe the various models available in the API.
  - name: Chat
    description: >
-      Given a list of messages comprising a conversation, the model will return
-      a response.
+      Given a list of messages comprising a conversation, the model will
+      return a response.
  - name: Messages
    description: >
-      Messages capture a conversation's content. This can include the content
-      from LLM responses and other metadata from [chat
+      Messages capture a conversation's content. This can include the
+      content from LLM responses and other metadata from [chat
      completions](/specs/chats).
  - name: Threads
  - name: Assistants
@ -49,34 +49,37 @@ paths:
      summary: |
        Create chat completion
      description: >
-        Creates a model response for the given chat conversation.  <a  href =
-        "https://platform.openai.com/docs/api-reference/chat/create"> Equivalent
-        to OpenAI's create chat completion. </a>
+        Creates a model response for the given chat conversation.  <a  href
+        = "https://platform.openai.com/docs/api-reference/chat/create">
+        Equivalent to OpenAI's create chat completion. </a>
      requestBody:
        content:
          application/json:
            schema:
-              $ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest"
+              $ref: specs/chat.yaml#/components/schemas/ChatCompletionRequest
      responses:
        "200":
          description: OK
          content:
            application/json:
              schema:
-                $ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse"
+                $ref: specs/chat.yaml#/components/schemas/ChatCompletionResponse
      x-codeSamples:
        - lang: cURL
-          source: >
-            curl -X POST
-            'http://localhost:3982/inferences/llamacpp/chat_completion' \
+          source: |
+            curl http://localhost:1337/v1/chat/completions \
              -H "Content-Type: application/json" \
              -d '{
-                    "llama_model_path": "/path/to/your/model.gguf",
+                "model": "tinyllama-1.1b",
                "messages": [
                  {
-                        "role": "user",
-                        "content": "hello"
+                    "role": "system",
+                    "content": "You are a helpful assistant."
                  },
+                  {
+                    "role": "user",
+                    "content": "Hello!"
+                  }
                ]
              }'
  /models:
@ -86,17 +89,17 @@ paths:
        - Models
      summary: List models
      description: >
-        Lists the currently available models, and provides basic information
-        about each one such as the owner and availability.  <a  href =
-        "https://platform.openai.com/docs/api-reference/models/list"> Equivalent
-        to OpenAI's list model. </a>
+        Lists the currently available models, and provides basic
+        information about each one such as the owner and availability.  <a  href
+        = "https://platform.openai.com/docs/api-reference/models/list">
+        Equivalent to OpenAI's list model. </a>
      responses:
        "200":
          description: OK
          content:
            application/json:
              schema:
-                $ref: "specs/models.yaml#/components/schemas/ListModelsResponse"
+                $ref: specs/models.yaml#/components/schemas/ListModelsResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -114,7 +117,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/models.yaml#/components/schemas/DownloadModelResponse"
+                $ref: specs/models.yaml#/components/schemas/DownloadModelResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -126,8 +129,8 @@ paths:
        - Models
      summary: Retrieve model
      description: >
-        Get a model instance, providing basic information about the model such
-        as the owner and permissioning.  <a  href =
+        Get a model instance, providing basic information about the model
+        such as the owner and permissioning.  <a  href =
        "https://platform.openai.com/docs/api-reference/models/retrieve">
        Equivalent to OpenAI's retrieve model. </a>
      parameters:
@ -145,7 +148,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/models.yaml#/components/schemas/GetModelResponse"
+                $ref: specs/models.yaml#/components/schemas/GetModelResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -174,7 +177,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/models.yaml#/components/schemas/DeleteModelResponse"
+                $ref: specs/models.yaml#/components/schemas/DeleteModelResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -202,7 +205,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/models.yaml#/components/schemas/StartModelResponse"
+                $ref: specs/models.yaml#/components/schemas/StartModelResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -229,7 +232,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/models.yaml#/components/schemas/StopModelResponse"
+                $ref: specs/models.yaml#/components/schemas/StopModelResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -255,14 +258,14 @@ paths:
                  type: array
                  description: Initial set of messages for the thread.
                  items:
-                    $ref: "specs/threads.yaml#/components/schemas/ThreadMessageObject"
+                    $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
      responses:
        "200":
          description: Thread created successfully
          content:
            application/json:
              schema:
-                $ref: "specs/threads.yaml#/components/schemas/CreateThreadResponse"
+                $ref: specs/threads.yaml#/components/schemas/CreateThreadResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -293,7 +296,7 @@ paths:
              schema:
                type: array
                items:
-                  $ref: "specs/threads.yaml#/components/schemas/ThreadObject"
+                  $ref: specs/threads.yaml#/components/schemas/ThreadObject
                example:
                  - id: thread_abc123
                    object: thread
@ -340,7 +343,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/threads.yaml#/components/schemas/GetThreadResponse"
+                $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -374,14 +377,14 @@ paths:
                  type: array
                  description: Set of messages to update in the thread.
                  items:
-                    $ref: "specs/threads.yaml#/components/schemas/ThreadMessageObject"
+                    $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
      responses:
        "200":
          description: Thread modified successfully
          content:
            application/json:
              schema:
-                $ref: "specs/threads.yaml#/components/schemas/ModifyThreadResponse"
+                $ref: specs/threads.yaml#/components/schemas/ModifyThreadResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -421,7 +424,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/threads.yaml#/components/schemas/DeleteThreadResponse"
+                $ref: specs/threads.yaml#/components/schemas/DeleteThreadResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -448,7 +451,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/threads.yaml#/components/schemas/GetThreadResponse"
+                $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -484,7 +487,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/threads.yaml#/components/schemas/GetThreadResponse"
+                $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -513,7 +516,7 @@ paths:
                    created_at: 1698984975
                    name: Math Tutor
                    description: null
-                    avatar: "https://pic.png"
+                    avatar: https://pic.png
                    models:
                      - model_id: model_0
                    instructions: Be concise
@ -527,7 +530,7 @@ paths:
                    created_at: 1698984975
                    name: Physics Tutor
                    description: null
-                    avatar: "https://pic.png"
+                    avatar: https://pic.png
                    models:
                      - model_id: model_1
                    instructions: Be concise!
@ -559,8 +562,7 @@ paths:
              properties:
                models:
                  type: array
-                  description: >-
-                    List of models associated with the assistant. Jan-specific
+                  description: List of models associated with the assistant. Jan-specific
                    property.
                  items:
                    type: object
@ -574,8 +576,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: >-
-                  specs/assistants.yaml#/components/schemas/CreateAssistantResponse
+                $ref: specs/assistants.yaml#/components/schemas/CreateAssistantResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -613,8 +614,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: >-
-                  specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
+                $ref: specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -647,8 +647,7 @@ paths:
              properties:
                models:
                  type: array
-                  description: >-
-                    List of models associated with the assistant. Jan-specific
+                  description: List of models associated with the assistant. Jan-specific
                    property.
                  items:
                    type: object
@ -670,8 +669,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: >-
-                  specs/assistants.yaml#/components/schemas/ModifyAssistantResponse
+                $ref: specs/assistants.yaml#/components/schemas/ModifyAssistantResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -710,8 +708,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: >-
-                  specs/assistants.yaml#/components/schemas/DeleteAssistantResponse
+                $ref: specs/assistants.yaml#/components/schemas/DeleteAssistantResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -741,7 +738,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/messages.yaml#/components/schemas/ListMessagesResponse"
+                $ref: specs/messages.yaml#/components/schemas/ListMessagesResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -794,7 +791,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/messages.yaml#/components/schemas/CreateMessageResponse"
+                $ref: specs/messages.yaml#/components/schemas/CreateMessageResponse
      x-codeSamples:
        - lang: cURL
          source: |
@ -838,12 +835,12 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/messages.yaml#/components/schemas/GetMessageResponse"
+                $ref: specs/messages.yaml#/components/schemas/GetMessageResponse
      x-codeSamples:
        - lang: cURL
          source: >
-            curl
-            http://localhost:1337/v1/threads/{thread_id}/messages/{message_id} \
+            curl http://localhost:1337/v1/threads/{thread_id}/messages/{message_id}
+            \
              -H "Content-Type: application/json"
  "/threads/{thread_id}/messages/{message_id}/files":
    get:
@ -879,8 +876,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: >-
-                  specs/messages.yaml#/components/schemas/ListMessageFilesResponse
+                $ref: specs/messages.yaml#/components/schemas/ListMessageFilesResponse
      x-codeSamples:
        - lang: cURL
          source: >
@ -895,8 +891,8 @@ paths:
        - Messages
      summary: Retrieve message file
      description: >
-        Retrieves a file associated with a specific message in a thread.  <a 
-        href =
+        Retrieves a file associated with a specific message in a
+        thread.  <a  href =
        "https://platform.openai.com/docs/api-reference/messages/getMessageFile">
        Equivalent to OpenAI's retrieve message file. </a>
      parameters:
@ -930,7 +926,7 @@ paths:
          content:
            application/json:
              schema:
-                $ref: "specs/messages.yaml#/components/schemas/MessageFileObject"
+                $ref: specs/messages.yaml#/components/schemas/MessageFileObject
      x-codeSamples:
        - lang: cURL
          source: >
@ -953,14 +949,15 @@ x-webhooks:
        content:
          application/json:
            schema:
-              $ref: "specs/models.yaml#/components/schemas/ModelObject"
+              $ref: specs/models.yaml#/components/schemas/ModelObject
  AssistantObject:
    post:
      summary: The assistant object
      description: >
-        Build assistants that can call models and use tools to perform tasks.
-        <a  href = "https://platform.openai.com/docs/api-reference/assistants">
-        Equivalent to OpenAI's assistants object. </a>
+        Build assistants that can call models and use tools to perform
+        tasks. <a  href =
+        "https://platform.openai.com/docs/api-reference/assistants"> Equivalent
+        to OpenAI's assistants object. </a>
      operationId: AssistantObjects
      tags:
        - Assistants
@ -968,7 +965,7 @@ x-webhooks:
        content:
          application/json:
            schema:
-              $ref: "specs/assistants.yaml#/components/schemas/AssistantObject"
+              $ref: specs/assistants.yaml#/components/schemas/AssistantObject
  MessageObject:
    post:
      summary: The message object
@ -983,12 +980,11 @@ x-webhooks:
        content:
          application/json:
            schema:
-              $ref: "specs/messages.yaml#/components/schemas/MessageObject"
+              $ref: specs/messages.yaml#/components/schemas/MessageObject
  ThreadObject:
    post:
      summary: The thread object
-      description: >-
-        Represents a thread that contains messages. <a  href =
+      description: Represents a thread that contains messages. <a  href =
        "https://platform.openai.com/docs/api-reference/threads/object">
        Equivalent to OpenAI's thread object. </a>
      operationId: ThreadObject
@ -998,4 +994,4 @@ x-webhooks:
        content:
          application/json:
            schema:
-              $ref: "specs/threads.yaml#/components/schemas/ThreadObject"
+              $ref: specs/threads.yaml#/components/schemas/ThreadObject
--- a/docs/openapi/specs/assistants.yaml
+++ b/docs/openapi/specs/assistants.yaml
@ -1,3 +1,4 @@
+---
 components:
  schemas:
    AssistantObject:
@ -9,7 +10,7 @@ components:
          example: asst_abc123
        object:
          type: string
-          description: "Type of the object, indicating it's an assistant."
+          description: Type of the object, indicating it's an assistant.
          default: assistant
        version:
          type: integer
@ -31,7 +32,7 @@ components:
        avatar:
          type: string
          description: URL of the assistant's avatar. Jan-specific property.
-          example: "https://pic.png"
+          example: https://pic.png
        models:
          type: array
          description: List of models associated with the assistant. Jan-specific property.
@ -70,7 +71,7 @@ components:
          example: asst_abc123
        object:
          type: string
-          description: "Type of the object, indicating it's an assistant."
+          description: Type of the object, indicating it's an assistant.
          default: assistant
        version:
          type: integer
@ -92,7 +93,7 @@ components:
        avatar:
          type: string
          description: URL of the assistant's avatar. Jan-specific property.
-          example: "https://pic.png"
+          example: https://pic.png
        models:
          type: array
          description: List of models associated with the assistant. Jan-specific property.
@ -130,7 +131,7 @@ components:
          example: asst_abc123
        object:
          type: string
-          description: "Type of the object, indicating it's an assistant."
+          description: Type of the object, indicating it's an assistant.
          default: assistant
        version:
          type: integer
@ -152,7 +153,7 @@ components:
        avatar:
          type: string
          description: URL of the assistant's avatar. Jan-specific property.
-          example: "https://pic.png"
+          example: https://pic.png
        models:
          type: array
          description: List of models associated with the assistant. Jan-specific property.
@ -190,7 +191,7 @@ components:
          example: asst_abc123
        object:
          type: string
-          description: "Type of the object, indicating it's an assistant."
+          description: Type of the object, indicating it's an assistant.
          default: assistant
        version:
          type: integer
@ -212,7 +213,7 @@ components:
        avatar:
          type: string
          description: URL of the assistant's avatar. Jan-specific property.
-          example: "https://pic.png"
+          example: https://pic.png
        models:
          type: array
          description: List of models associated with the assistant. Jan-specific property.
@ -250,7 +251,7 @@ components:
          example: asst_abc123
        object:
          type: string
-          description: "Type of the object, indicating it's an assistant."
+          description: Type of the object, indicating it's an assistant.
          default: assistant
        version:
          type: integer
@ -272,7 +273,7 @@ components:
        avatar:
          type: string
          description: URL of the assistant's avatar. Jan-specific property.
-          example: "https://pic.png"
+          example: https://pic.png
        models:
          type: array
          description: List of models associated with the assistant. Jan-specific property.
@ -310,7 +311,7 @@ components:
          example: asst_abc123
        object:
          type: string
-          description: "Type of the object, indicating the assistant has been deleted."
+          description: Type of the object, indicating the assistant has been deleted.
          example: assistant.deleted
        deleted:
          type: boolean
--- a/docs/openapi/specs/chat.yaml
+++ b/docs/openapi/specs/chat.yaml
@ -1,3 +1,4 @@
+---
 components:
  schemas:
    ChatObject:
@ -15,8 +16,7 @@ components:
        stream:
          type: boolean
          default: true
-          description: >-
-            Enables continuous output generation, allowing for streaming of
+          description: Enables continuous output generation, allowing for streaming of
            model responses.
        model:
          type: string
@ -25,27 +25,23 @@ components:
        max_tokens:
          type: number
          default: 2048
-          description: >-
-            The maximum number of tokens the model will generate in a single
+          description: The maximum number of tokens the model will generate in a single
            response.
        stop:
          type: arrays
          example:
            - hello
-          description: >-
-            Defines specific tokens or phrases at which the model will stop
+          description: Defines specific tokens or phrases at which the model will stop
            generating further output/
        frequency_penalty:
          type: number
          default: 0
-          description: >-
-            Adjusts the likelihood of the model repeating words or phrases in
+          description: Adjusts the likelihood of the model repeating words or phrases in
            its output.
        presence_penalty:
          type: number
          default: 0
-          description: >-
-            Influences the generation of new and varied concepts in the model's
+          description: Influences the generation of new and varied concepts in the model's
            output.
        temperature:
          type: number
@ -71,13 +67,13 @@ components:
          description: |
            Contains input data or prompts for the model to process.
          example:
-            - content: "Hello there :wave:"
-              role: assistant
-            - content: Can you write a long story
+            - content: You are a helpful assistant.
+              role: system
+            - content: Hello!
              role: user
        model:
          type: string
-          example: model-zephyr-7B
+          example: tinyllama-1.1b
          description: |
            Specifies the model being used for inference or processing tasks.
        stream:
@ -139,7 +135,7 @@ components:
                type: string
                nullable: true
                example: null
-                description: "Reason for finishing the response, if applicable"
+                description: Reason for finishing the response, if applicable
              index:
                type: integer
                example: 0
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@ -1,3 +1,4 @@
+---
 components:
  schemas:
    ListModelsResponse:
@ -27,8 +28,7 @@ components:
          description: The version number of the model.
        id:
          type: string
-          description: >-
-            Unique identifier used in chat-completions model_name, matches
+          description: Unique identifier used in chat-completions model_name, matches
            folder name.
          example: zephyr-7b
        name:
@ -57,14 +57,13 @@ components:
          description: Current state of the model.
        format:
          type: string
-          description: "State format of the model, distinct from the engine."
+          description: State format of the model, distinct from the engine.
          example: ggufv3
        source_url:
          type: string
          format: uri
          description: URL to the source of the model.
-          example: >-
-            https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
        settings:
          type: object
          properties:
@ -152,7 +151,7 @@ components:
          example: zephyr-7b
        object:
          type: string
-          description: "Type of the object, indicating it's a model."
+          description: Type of the object, indicating it's a model.
          default: model
        created:
          type: integer
@ -174,8 +173,7 @@ components:
          type: string
          format: uri
          description: URL to the source of the model.
-          example: >-
-            https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
        engine_parameters:
          type: object
          properties:
@ -198,8 +196,7 @@ components:
              default: "ASSISTANT: "
            ngl:
              type: integer
-              description: >-
-                Number of neural network layers loaded onto the GPU for
+              description: Number of neural network layers loaded onto the GPU for
                acceleration.
              minimum: 0
              maximum: 100
@ -207,18 +204,16 @@ components:
              example: 100
            ctx_len:
              type: integer
-              description: >-
-                Context length for model operations, varies based on the
-                specific model.
+              description: Context length for model operations, varies based on the specific
+                model.
              minimum: 128
              maximum: 4096
              default: 2048
              example: 2048
            n_parallel:
              type: integer
-              description: >-
-                Number of parallel operations, relevant when continuous batching
-                is enabled.
+              description: Number of parallel operations, relevant when continuous batching is
+                enabled.
              minimum: 1
              maximum: 10
              default: 1
@ -269,8 +264,7 @@ components:
              example: 4
            temperature:
              type: number
-              description: >-
-                Controls randomness in model's responses. Higher values lead to
+              description: Controls randomness in model's responses. Higher values lead to
                more random responses.
              minimum: 0
              maximum: 2
@ -278,8 +272,7 @@ components:
              example: 0.7
            token_limit:
              type: integer
-              description: >-
-                Maximum number of tokens the model can generate in a single
+              description: Maximum number of tokens the model can generate in a single
                response.
              minimum: 1
              maximum: 4096
@ -287,18 +280,16 @@ components:
              example: 2048
            top_k:
              type: integer
-              description: >-
-                Limits the model to consider only the top k most likely next
-                tokens at each step.
+              description: Limits the model to consider only the top k most likely next tokens
+                at each step.
              minimum: 0
              maximum: 100
              default: 0
              example: 0
            top_p:
              type: number
-              description: >-
-                Nucleus sampling parameter. The model considers the smallest set
-                of tokens whose cumulative probability exceeds the top_p value.
+              description: Nucleus sampling parameter. The model considers the smallest set of
+                tokens whose cumulative probability exceeds the top_p value.
              minimum: 0
              maximum: 1
              default: 1
--- a/docs/openapi/specs/threads.yaml
+++ b/docs/openapi/specs/threads.yaml
@ -1,3 +1,4 @@
+---
 components:
  schemas:
    ThreadObject:
@ -39,13 +40,13 @@ components:
                  settings:
                    type: object
                    description: >
-                      Defaults to and overrides assistant.json's "settings" (and
-                      if none, then model.json "settings")
+                      Defaults to and overrides assistant.json's "settings" (and if none,
+                      then model.json "settings")
                  parameters:
                    type: object
                    description: >
-                      Defaults to and overrides assistant.json's "parameters"
-                      (and if none, then model.json "parameters")
+                      Defaults to and overrides assistant.json's "parameters" (and if
+                      none, then model.json "parameters")
        created:
          type: integer
          format: int64
@ -141,7 +142,7 @@ components:
          example: thread_abc123
        object:
          type: string
-          description: "Type of the object, indicating it's a thread."
+          description: Type of the object, indicating it's a thread.
          example: thread
        created_at:
          type: integer
@ -161,7 +162,7 @@ components:
          example: thread_abc123
        object:
          type: string
-          description: "Type of the object, indicating the thread has been deleted."
+          description: Type of the object, indicating the thread has been deleted.
          example: thread.deleted
        deleted:
          type: boolean
--- a/docs/static/img/usage/jan-gpu-enable-setting.png
+++ b/docs/static/img/usage/jan-gpu-enable-setting.png
--- a/docs/static/img/usage/jan-open-home-directory.png
+++ b/docs/static/img/usage/jan-open-home-directory.png
--- a/docs/static/img/usage/jan-open-settings-1.png
+++ b/docs/static/img/usage/jan-open-settings-1.png
--- a/docs/static/img/usage/jan-open-settings-2.png
+++ b/docs/static/img/usage/jan-open-settings-2.png
--- a/docs/static/img/usage/jan-open-settings-3.png
+++ b/docs/static/img/usage/jan-open-settings-3.png
--- a/electron/handlers/fileManager.ts
+++ b/electron/handlers/fileManager.ts
@ -4,14 +4,17 @@ import reflect from '@alumna/reflect'

 import { FileManagerRoute } from '@janhq/core'
 import { userSpacePath, getResourcePath } from './../utils/path'
+import fs from 'fs'
+import { join } from 'path'
+import { FileStat } from '@janhq/core/.'

 /**
 * Handles file system extensions operations.
 */
 export function handleFileMangerIPCs() {
-  // Handles the 'synceFile' IPC event. This event is triggered to synchronize a file from a source path to a destination path.
+  // Handles the 'syncFile' IPC event. This event is triggered to synchronize a file from a source path to a destination path.
  ipcMain.handle(
-    FileManagerRoute.synceFile,
+    FileManagerRoute.syncFile,
    async (_event, src: string, dest: string) => {
      return reflect({
        src,
@ -31,7 +34,33 @@ export function handleFileMangerIPCs() {
  )

  // Handles the 'getResourcePath' IPC event. This event is triggered to get the resource path.
-  ipcMain.handle(FileManagerRoute.getResourcePath, async (_event) => {
-    return getResourcePath()
-  })
+  ipcMain.handle(FileManagerRoute.getResourcePath, async (_event) =>
+    getResourcePath()
+  )
+
+  // handle fs is directory here
+  ipcMain.handle(
+    FileManagerRoute.fileStat,
+    async (_event, path: string): Promise<FileStat | undefined> => {
+      const normalizedPath = path
+        .replace(`file://`, '')
+        .replace(`file:/`, '')
+        .replace(`file:\\\\`, '')
+        .replace(`file:\\`, '')
+
+      const fullPath = join(userSpacePath, normalizedPath)
+      const isExist = fs.existsSync(fullPath)
+      if (!isExist) return undefined
+
+      const isDirectory = fs.lstatSync(fullPath).isDirectory()
+      const size = fs.statSync(fullPath).size
+
+      const fileStat: FileStat = {
+        isDirectory,
+        size,
+      }
+
+      return fileStat
+    }
+  )
 }
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ b/extensions/inference-nitro-extension/bin/version.txt
@ -1 +1 @@
-0.1.32
+0.1.34
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@ -1,3 +1,3 @@
@echo off
 set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./bin/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu
+.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-4.tar.gz -e --strip 1 -o ./bin/win-cuda-11-4 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -8,7 +8,7 @@
  "license": "AGPL-3.0",
  "scripts": {
    "build": "tsc -b . && webpack --config webpack.config.js",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-4.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-4 && chmod +x ./bin/linux-cuda-11-4/nitro",
    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
    "downloadnitro:win32": "download.bat",
    "downloadnitro": "run-script-os",
--- a/extensions/inference-nitro-extension/src/helpers/sse.ts
+++ b/extensions/inference-nitro-extension/src/helpers/sse.ts
@ -30,7 +30,10 @@ export function requestInference(
      signal: controller?.signal,
    })
      .then(async (response) => {
-        if (model.parameters.stream) {
+        if (model.parameters.stream === false) {
+          const data = await response.json();
+          subscriber.next(data.choices[0]?.message?.content ?? "");
+        } else {
          const stream = response.body;
          const decoder = new TextDecoder("utf-8");
          const reader = stream?.getReader();
@ -54,9 +57,6 @@ export function requestInference(
              }
            }
          }
-        } else {
-          const data = await response.json();
-          subscriber.next(data.choices[0]?.message?.content ?? "");
        }
        subscriber.complete();
      })
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@ -85,28 +85,40 @@ function checkFileExistenceInPaths(file: string, paths: string[]): boolean {
 }

 function updateCudaExistence() {
-  let files: string[];
+  let filesCuda12: string[];
+  let filesCuda11: string[];
  let paths: string[];
+  let cudaVersion: string = "";

  if (process.platform === "win32") {
-    files = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
+    filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
+    filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
-    const nitro_cuda_path = path.join(__dirname, "bin", "win-cuda");
-    paths.push(nitro_cuda_path);
  } else {
-    files = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
+    filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
+    filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : [];
-    const nitro_cuda_path = path.join(__dirname, "bin", "linux-cuda");
-    paths.push(nitro_cuda_path);
    paths.push("/usr/lib/x86_64-linux-gnu/");
  }

-  let cudaExists = files.every(
+  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  );

+  if (!cudaExists) {
+    cudaExists = filesCuda11.every(
+      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
+    );
+    if (cudaExists) {
+      cudaVersion = "11";
+    }
+  }
+  else {
+    cudaVersion = "12";
+  }
+
  let data;
  try {
    data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
@ -115,6 +127,7 @@ function updateCudaExistence() {
  }

  data["cuda"].exist = cudaExists;
+  data["cuda"].version = cudaVersion;
  if (cudaExists) {
    data.run_mode = "gpu";
  }
@ -376,12 +389,17 @@ function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
    let cudaVisibleDevices = "";
    let binaryName;
    if (process.platform === "win32") {
-      let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-      if (nvida_info["run_mode"] === "cpu") {
+      let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
+      if (nvidiaInfo["run_mode"] === "cpu") {
        binaryFolder = path.join(binaryFolder, "win-cpu");
      } else {
-        binaryFolder = path.join(binaryFolder, "win-cuda");
-        cudaVisibleDevices = nvida_info["gpu_highest_vram"];
+        if (nvidiaInfo["cuda"].version === "12") {
+          binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
+        }
+        else {
+          binaryFolder = path.join(binaryFolder, "win-cuda-11-4");
+        }
+        cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
      }
      binaryName = "nitro.exe";
    } else if (process.platform === "darwin") {
@ -392,12 +410,17 @@ function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
      }
      binaryName = "nitro";
    } else {
-      let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
-      if (nvida_info["run_mode"] === "cpu") {
+      let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
+      if (nvidiaInfo["run_mode"] === "cpu") {
        binaryFolder = path.join(binaryFolder, "linux-cpu");
      } else {
-        binaryFolder = path.join(binaryFolder, "linux-cuda");
-        cudaVisibleDevices = nvida_info["gpu_highest_vram"];
+        if (nvidiaInfo["cuda"].version === "12") {
+          binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
+        }
+        else {
+          binaryFolder = path.join(binaryFolder, "linux-cuda-11-4");
+        }
+        cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
      }
      binaryName = "nitro";
    }
--- a/extensions/inference-openai-extension/src/helpers/sse.ts
+++ b/extensions/inference-openai-extension/src/helpers/sse.ts
@ -46,7 +46,10 @@ export function requestInference(
          subscriber.complete();
          return;
        }
-        if (model.parameters.stream) {
+        if (model.parameters.stream === false) {
+          const data = await response.json();
+          subscriber.next(data.choices[0]?.message?.content ?? "");
+        } else {
          const stream = response.body;
          const decoder = new TextDecoder("utf-8");
          const reader = stream?.getReader();
@ -70,9 +73,6 @@ export function requestInference(
              }
            }
          }
-        } else {
-          const data = await response.json();
-          subscriber.next(data.choices[0]?.message?.content ?? "");
        }
        subscriber.complete();
      })
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -1,6 +1,6 @@
 {
  "name": "@janhq/model-extension",
-  "version": "1.0.17",
+  "version": "1.0.18",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -5,11 +5,12 @@ import {
  abortDownload,
  getResourcePath,
  getUserSpace,
+  fileStat,
  InferenceEngine,
  joinPath,
+  ModelExtension,
+  Model,
 } from '@janhq/core'
-import { ModelExtension, Model } from '@janhq/core'
-import { baseName } from '@janhq/core/.'

 /**
 * A extension for models
@ -21,6 +22,9 @@ export default class JanModelExtension implements ModelExtension {
  private static readonly _incompletedModelFileName = '.download'
  private static readonly _offlineInferenceEngine = InferenceEngine.nitro

+  private static readonly _configDirName = 'config'
+  private static readonly _defaultModelFileName = 'default-model.json'
+
  /**
   * Implements type from JanExtension.
   * @override
@ -46,17 +50,16 @@ export default class JanModelExtension implements ModelExtension {

  private async copyModelsToHomeDir() {
    try {
-      // list all of the files under the home directory
      
-      if (await fs.existsSync(JanModelExtension._homeDir)) {
-        // ignore if the model is already downloaded
+      // Check for migration conditions
+      if (
+        localStorage.getItem(`${EXTENSION_NAME}-version`) === VERSION &&
+        (await fs.existsSync(JanModelExtension._homeDir))
+      ) {
+        // ignore if the there is no need to migrate
        console.debug('Models already persisted.')
        return
      }
-
-      // Get available models
-      const readyModels = (await this.getDownloadedModels()).map((e) => e.id)
-
      // copy models folder from resources to home directory
      const resourePath = await getResourcePath()
      const srcPath = await joinPath([resourePath, 'models'])
@ -68,18 +71,7 @@ export default class JanModelExtension implements ModelExtension {

      console.debug('Finished syncing models')

-      const reconfigureModels = (await this.getConfiguredModels()).filter((e) =>
-        readyModels.includes(e.id)
-      )
-      console.debug('Finished updating downloaded models')
-
-      // update back the status
-      await Promise.all(
-        reconfigureModels.map(async (model) => this.saveModel(model))
-      )
-
      // Finished migration
-
      localStorage.setItem(`${EXTENSION_NAME}-version`, VERSION)
    } catch (err) {
      console.error(err)
@ -199,7 +191,7 @@ export default class JanModelExtension implements ModelExtension {
  ): Promise<Model[]> {
    try {
      if (!(await fs.existsSync(JanModelExtension._homeDir))) {
-        console.debug('model folder not found')
+        console.debug('Model folder not found')
        return []
      }

@ -220,6 +212,9 @@ export default class JanModelExtension implements ModelExtension {
          dirName,
          JanModelExtension._modelMetadataFileName,
        ])
+
+        if (await fs.existsSync(jsonPath)) {
+          // if we have the model.json file, read it
          let model = await this.readModelMetadata(jsonPath)
          model = typeof model === 'object' ? model : JSON.parse(model)

@ -227,6 +222,12 @@ export default class JanModelExtension implements ModelExtension {
            return
          }
          return model
+        } else {
+          // otherwise, we generate our own model file
+          // TODO: we might have more than one binary file here. This will be addressed with new version of Model file
+          //  which is the PR from Hiro on branch Jan can see
+          return this.generateModelMetadata(dirName)
+        }
      })
      const results = await Promise.allSettled(readJsonPromises)
      const modelData = results.map((result) => {
@ -254,6 +255,84 @@ export default class JanModelExtension implements ModelExtension {
    return fs.readFileSync(path, 'utf-8')
  }

+  /**
+   * Handle the case where we have the model directory but we don't have the corresponding
+   * model.json file associated with it.
+   *
+   * This function will create a model.json file for the model.
+   *
+   * @param dirName the director which reside in ~/jan/models but does not have model.json file.
+   */
+  private async generateModelMetadata(dirName: string): Promise<Model> {
+    const files: string[] = await fs.readdirSync(
+      await joinPath([JanModelExtension._homeDir, dirName])
+    )
+
+    // sort files by name
+    files.sort()
+
+    // find the first file which is not a directory
+    let binaryFileName: string | undefined = undefined
+    let binaryFileSize: number | undefined = undefined
+
+    for (const file of files) {
+      if (file.endsWith(JanModelExtension._incompletedModelFileName)) continue
+      if (file.endsWith('.json')) continue
+
+      const path = await joinPath([JanModelExtension._homeDir, dirName, file])
+      const fileStats = await fileStat(path)
+      if (fileStats.isDirectory) continue
+      binaryFileSize = fileStats.size
+      binaryFileName = file
+      break
+    }
+
+    if (!binaryFileName) {
+      console.warn(`Unable to find binary file for model ${dirName}`)
+      return
+    }
+
+    const defaultModel = await this.getDefaultModel()
+    if (!defaultModel) {
+      console.error('Unable to find default model')
+      return
+    }
+
+    const model: Model = {
+      ...defaultModel,
+      id: dirName,
+      name: dirName,
+      created: Date.now(),
+      description: `${dirName} - user self import model`,
+    }
+
+    const modelFilePath = await joinPath([
+      JanModelExtension._homeDir,
+      dirName,
+      JanModelExtension._modelMetadataFileName,
+    ])
+
+    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
+
+    return model
+  }
+
+  private async getDefaultModel(): Promise<Model | undefined> {
+    const defaultModelPath = await joinPath([
+      JanModelExtension._homeDir,
+      JanModelExtension._configDirName,
+      JanModelExtension._defaultModelFileName,
+    ])
+
+    if (!(await fs.existsSync(defaultModelPath))) {
+      return undefined
+    }
+
+    const model = await this.readModelMetadata(defaultModelPath)
+
+    return typeof model === 'object' ? model : JSON.parse(model)
+  }
+
  /**
   * Gets all available models.
   * @returns A Promise that resolves with an array of all models.
--- a/models/config/default-model.json
+++ b/models/config/default-model.json
@ -0,0 +1,35 @@
+{
+  "object": "model",
+  "version": 1,
+  "format": "gguf",
+  "source_url": "N/A",
+  "id": "N/A",
+  "name": "N/A",
+  "created": 0,
+  "description": "User self import model",
+  "settings": {
+    "ctx_len": 4096,
+    "ngl": 0,
+    "embedding": false,
+    "n_parallel": 0,
+    "cpu_threads": 0,
+    "prompt_template": ""
+  },
+  "parameters": {
+    "temperature": 0,
+    "token_limit": 0,
+    "top_k": 0,
+    "top_p": 0,
+    "stream": false,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "User",
+    "tags": [],
+    "size": 0
+  },
+  "engine": "nitro"
+}
--- a/models/dolphin-2.7-mixtral-8x7b/model.json
+++ b/models/dolphin-2.7-mixtral-8x7b/model.json
@ -0,0 +1,22 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
+    "id": "dolphin-2.7-mixtral-8x7b",
+    "object": "model",
+    "name": "Dolphin 8x7B Q4",
+    "version": "1.0",
+    "description": "This model is an uncensored model based on Mixtral-8x7b. Dolphin is really good at coding",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Cognitive Computations, TheBloke",
+      "tags": ["70B", "Fintuned"],
+      "size": 26440000000
+    },
+    "engine": "nitro"
+  }
--- a/models/lzlv-70b/model.json
+++ b/models/lzlv-70b/model.json
@ -1,23 +0,0 @@
-{
-    "source_url": "https://huggingface.co/TheBloke/lzlv_70B-GGUF/resolve/main/lzlv_70b_fp16_hf.Q5_K_M.gguf",
-    "id": "lzlv-70b",
-    "object": "model",
-    "name": "Lzlv 70B Q4",
-    "version": "1.0",
-    "description": "lzlv_70B is a sophisticated AI model designed for roleplaying and creative tasks. This merge aims to combine intelligence with creativity, seemingly outperforming its individual components in complex scenarios and creative outputs.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "USER:\n{prompt}\nASSISTANT:"
-    },
-    "parameters": {
-      "max_tokens": 4096
-    },
-    "metadata": {
-      "author": "Lizpreciatior, The Bloke",
-      "tags": ["70B", "Finetuned"],
-      "size": 48750000000
-    },
-    "engine": "nitro"
-  }
-  
--- a/models/mistral-ins-7b-q4/model.json
+++ b/models/mistral-ins-7b-q4/model.json
@ -1,5 +1,5 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+    "source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
    "id": "mistral-ins-7b-q4",
    "object": "model",
    "name": "Mistral Instruct 7B Q4",
@ -8,9 +8,6 @@
    "format": "gguf",
    "settings": {
      "ctx_len": 4096,
-      "system_prompt": "",
-      "user_prompt": "<s>[INST]",
-      "ai_prompt": "[/INST]",
      "prompt_template": "<s>[INST]{prompt}\n[/INST]"
    },
    "parameters": {
--- a/models/openchat-3.5-7b/model.json
+++ b/models/openchat-3.5-7b/model.json
@ -0,0 +1,22 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf",
+    "id": "openchat-3.5-7b",
+    "object": "model",
+    "name": "Openchat-3.5 7B Q4",
+    "version": "1.0",
+    "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Openchat",
+      "tags": ["7B", "Fintuned", "Featured"],
+      "size": 4370000000
+    },
+    "engine": "nitro"
+  }
--- a/models/stealth-v1.2-7b/model.json
+++ b/models/stealth-v1.2-7b/model.json
@ -1,5 +1,5 @@
 {
-    "source_url": "https://huggingface.co/janhq/stealth-v1.2-GGUF/resolve/main/stealth-v1.2.Q4_K_M.gguf",
+    "source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf",
    "id": "stealth-v1.2-7b",
    "object": "model",
    "name": "Stealth-v1.2 7B Q4",
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@ -1,5 +1,5 @@
 {
-  "source_url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf",
+  "source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
  "id": "tinyllama-1.1b",
  "object": "model",
  "name": "TinyLlama Chat 1.1B Q4",
@ -16,7 +16,7 @@
  "metadata": {
      "author": "TinyLlama",
      "tags": ["Tiny", "Foundation Model"],
-      "size": 637000000
+      "size": 669000000
  },
  "engine": "nitro"
 }
--- a/models/trinity-v1-7b/cover.png
+++ b/models/trinity-v1-7b/cover.png
--- a/models/trinity-v1-7b/model.json
+++ b/models/trinity-v1-7b/model.json
@ -1,22 +0,0 @@
-{
-    "source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf",
-    "id": "trinity-v1-7b",
-    "object": "model",
-    "name": "Trinity-v1 7B Q4",
-    "version": "1.0",
-    "description": "Please use the latest version Trinity v1.2 for the best experience. Trinity is an experimental model merge of GreenNodeLM & LeoScorpius using the Slerp method. Recommended for daily assistance purposes.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "max_tokens": 4096
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": ["7B", "Merged"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }  
--- a/models/tulu-2-70b/model.json
+++ b/models/tulu-2-70b/model.json
@ -0,0 +1,22 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf",
+    "id": "tulu-2-70b",
+    "object": "model",
+    "name": "Tulu 2 70B Q4",
+    "version": "1.0",
+    "description": "Tulu V2 DPO 70B is a fine-tuned version of Llama 2 using (DPO). This model is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "prompt_template": "<|user|>\n{prompt}\n<|assistant|>"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "Lizpreciatior, The Bloke",
+      "tags": ["70B", "Finetuned"],
+      "size": 41400000000
+    },
+    "engine": "nitro"
+  }
--- a/models/yarn-mistral-7b/model.json
+++ b/models/yarn-mistral-7b/model.json
@ -0,0 +1,23 @@
+{
+    "source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf",
+    "id": "yarn-mistral-7b",
+    "object": "model",
+    "name": "Yarn Mistral 7B Q4",
+    "version": "1.0",
+    "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
+    "format": "gguf",
+    "settings": {
+      "ctx_len": 4096,
+      "prompt_template": "{prompt}"
+    },
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "NousResearch, The Bloke",
+      "tags": ["7B","Finetuned"],
+      "size": 4370000000
+    },
+    "engine": "nitro"
+  }
+  
--- a/server/package.json
+++ b/server/package.json
@ -17,6 +17,7 @@
    "build": "tsc"
  },
  "dependencies": {
+    "@alumna/reflect": "^1.1.3",
    "@fastify/cors": "^8.4.2",
    "@fastify/static": "^6.12.0",
    "@fastify/swagger": "^8.13.0",
--- a/web/containers/GPUDriverPromptModal/index.tsx
+++ b/web/containers/GPUDriverPromptModal/index.tsx
@ -36,28 +36,23 @@ const GPUDriverPrompt: React.FC = () => {
      <Modal open={showNotification} onOpenChange={openChanged}>
        <ModalContent>
          <ModalHeader>
-            <ModalTitle>Missing Nvidia Driver and Cuda Toolkit</ModalTitle>
+            <ModalTitle>
+              Checking for machine that does not meet the requirements.
+            </ModalTitle>
          </ModalHeader>
          <p>
-            It seems like you are missing Nvidia Driver or Cuda Toolkit or both.
-            Please follow the instructions on the{' '}
+            It appears that you are missing some dependencies required to run in
+            GPU mode. Please follow the instructions below for more details{' '}
            <span
              className="cursor-pointer text-blue-600"
              onClick={() =>
-                openExternalUrl('https://developer.nvidia.com/cuda-toolkit')
+                openExternalUrl(
+                  'https://github.com/janhq/jan/blob/main/USAGE.md'
+                )
              }
            >
-              NVidia Cuda Toolkit Installation Page
+              Jan running mode documentation
            </span>{' '}
-            and the{' '}
-            <span
-              className="cursor-pointer text-blue-600"
-              onClick={() =>
-                openExternalUrl('https://www.nvidia.com/Download/index.aspx')
-              }
-            >
-              Nvidia Driver Installation Page
-            </span>
            .
          </p>
          <div className="flex items-center space-x-2">
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@ -22,8 +22,12 @@ export default function EventListenerWrapper({ children }: PropsWithChildren) {
  const modelsRef = useRef(models)

  const { setDownloadedModels, downloadedModels } = useGetDownloadedModels()
-  const { setDownloadState, setDownloadStateSuccess, setDownloadStateFailed } =
-    useDownloadState()
+  const {
+    setDownloadState,
+    setDownloadStateSuccess,
+    setDownloadStateFailed,
+    setDownloadStateCancelled,
+  } = useDownloadState()
  const downloadedModelRef = useRef(downloadedModels)

  useEffect(() => {
@ -52,13 +56,18 @@ export default function EventListenerWrapper({ children }: PropsWithChildren) {

      window.electronAPI.onFileDownloadError(
        async (_event: string, state: any) => {
-          if (state.err?.message !== 'aborted')
-            console.error('Download error', state)
          const modelName = await baseName(state.fileName)
          const model = modelsRef.current.find(
            (model) => modelBinFileName(model) === modelName
          )
-          if (model) setDownloadStateFailed(model.id)
+          if (model) {
+            if (state.err?.message !== 'aborted') {
+              console.error('Download error', state)
+              setDownloadStateFailed(model.id, state.err.message)
+            } else {
+              setDownloadStateCancelled(model.id)
+            }
+          }
        }
      )

--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@ -29,7 +29,28 @@ const setDownloadStateSuccessAtom = atom(null, (get, set, modelId: string) => {
  })
 })

-const setDownloadStateFailedAtom = atom(null, (get, set, modelId: string) => {
+const setDownloadStateFailedAtom = atom(
+  null,
+  (get, set, modelId: string, error: string) => {
+    const currentState = { ...get(modelDownloadStateAtom) }
+    const state = currentState[modelId]
+    if (!state) {
+      console.debug(`Cannot find download state for ${modelId}`)
+      return
+    }
+    toaster({
+      title: 'Download Failed',
+      description: `Model ${modelId} download failed: ${error}`,
+      type: 'error',
+    })
+
+    delete currentState[modelId]
+    set(modelDownloadStateAtom, currentState)
+  }
+)
+const setDownloadStateCancelledAtom = atom(
+  null,
+  (get, set, modelId: string) => {
    const currentState = { ...get(modelDownloadStateAtom) }
    const state = currentState[modelId]
    if (!state) {
@ -38,17 +59,20 @@ const setDownloadStateFailedAtom = atom(null, (get, set, modelId: string) => {
        title: 'Cancel Download',
        description: `Model ${modelId} cancel download`,
      })
+
      return
    }
    delete currentState[modelId]
    set(modelDownloadStateAtom, currentState)
-})
+  }
+)

 export function useDownloadState() {
  const modelDownloadState = useAtomValue(modelDownloadStateAtom)
  const setDownloadState = useSetAtom(setDownloadStateAtom)
  const setDownloadStateSuccess = useSetAtom(setDownloadStateSuccessAtom)
  const setDownloadStateFailed = useSetAtom(setDownloadStateFailedAtom)
+  const setDownloadStateCancelled = useSetAtom(setDownloadStateCancelledAtom)

  const downloadStates: DownloadState[] = []
  for (const [, value] of Object.entries(modelDownloadState)) {
@ -61,6 +85,7 @@ export function useDownloadState() {
    setDownloadState,
    setDownloadStateSuccess,
    setDownloadStateFailed,
+    setDownloadStateCancelled,
    downloadStates,
  }
 }
--- a/web/screens/Chat/ModelSetting/predefinedComponent.ts
+++ b/web/screens/Chat/ModelSetting/predefinedComponent.ts
@ -72,7 +72,7 @@ export const presetConfiguration: Record<string, SettingComponentData> = {
  stream: {
    name: 'stream',
    title: 'Stream',
-    description: 'Stream',
+    description: 'Enable real-time data processing for faster predictions.',
    controllerType: 'checkbox',
    controllerData: {
      checked: false,
--- a/web/screens/Chat/Sidebar/index.tsx
+++ b/web/screens/Chat/Sidebar/index.tsx
@ -19,7 +19,7 @@ import DropdownListSidebar, {
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'

 import { getConfigurationsData } from '@/utils/componentSettings'
-import { toSettingParams } from '@/utils/model_param'
+import { toRuntimeParams, toSettingParams } from '@/utils/model_param'

 import EngineSetting from '../EngineSetting'
 import ModelSetting from '../ModelSetting'
@ -44,7 +44,9 @@ const Sidebar: React.FC = () => {
  const threadStates = useAtomValue(threadStatesAtom)

  const modelEngineParams = toSettingParams(activeModelParams)
+  const modelRuntimeParams = toRuntimeParams(activeModelParams)
  const componentDataEngineSetting = getConfigurationsData(modelEngineParams)
+  const componentDataRuntimeSetting = getConfigurationsData(modelRuntimeParams)

  const onReviewInFinderClick = async (type: string) => {
    if (!activeThread) return
@ -224,6 +226,7 @@ const Sidebar: React.FC = () => {
              <DropdownListSidebar />
            </div>

+            {componentDataRuntimeSetting.length !== 0 && (
              <div className="mt-6">
                <CardSidebar title="Inference Parameters" asChild>
                  <div className="px-2 py-4">
@ -231,7 +234,11 @@ const Sidebar: React.FC = () => {
                  </div>
                </CardSidebar>
              </div>
+            )}

+            {componentDataEngineSetting.filter(
+              (x) => x.name === 'prompt_template'
+            ).length !== 0 && (
              <div className="mt-4">
                <CardSidebar title="Model Parameters" asChild>
                  <div className="px-2 py-4">
@ -239,7 +246,9 @@ const Sidebar: React.FC = () => {
                  </div>
                </CardSidebar>
              </div>
+            )}

+            {componentDataEngineSetting.length !== 0 && (
              <div className="my-4">
                <CardSidebar
                  title="Engine Parameters"
@ -252,6 +261,7 @@ const Sidebar: React.FC = () => {
                  </div>
                </CardSidebar>
              </div>
+            )}
          </div>
        </CardSidebar>
      </div>
--- a/web/screens/Chat/SimpleTextMessage/index.tsx
+++ b/web/screens/Chat/SimpleTextMessage/index.tsx
@ -5,7 +5,7 @@ import { ChatCompletionRole, MessageStatus, ThreadMessage } from '@janhq/core'
 import hljs from 'highlight.js'

 import { useAtomValue } from 'jotai'
-import { Marked } from 'marked'
+import { Marked, Renderer } from 'marked'

 import { markedHighlight } from 'marked-highlight'

@ -30,7 +30,7 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
  }
  const clipboard = useClipboard({ timeout: 1000 })

-  const marked = new Marked(
+  const marked: Marked = new Marked(
    markedHighlight({
      langPrefix: 'hljs',
      highlight(code, lang) {
@ -46,6 +46,11 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
    }),
    {
      renderer: {
+        link: (href, title, text) => {
+          return Renderer.prototype.link
+            ?.apply(this, [href, title, text])
+            .replace('<a', "<a target='_blank'")
+        },
        code(code, lang, escaped) {
          return `
          <div class="relative code-block group/item">
--- a/web/screens/Chat/ThreadList/index.tsx
+++ b/web/screens/Chat/ThreadList/index.tsx
@ -5,6 +5,7 @@ import {
  ModalTrigger,
  ModalClose,
  ModalFooter,
+  ModalPortal,
  ModalContent,
  ModalHeader,
  ModalTitle,
@ -89,7 +90,9 @@ export default function ThreadList() {
              className={twMerge(
                `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-gray-100 hover:dark:bg-secondary/50`
              )}
-              onClick={() => onThreadClick(thread)}
+              onClick={() => {
+                onThreadClick(thread)
+              }}
            >
              <div className="relative z-10 p-4 py-4">
                <div className="flex justify-between">
@ -111,7 +114,7 @@ export default function ThreadList() {
                <MoreVerticalIcon />
                <div className="invisible absolute right-0 z-20 w-40 overflow-hidden rounded-lg border border-border bg-background shadow-lg group-hover/icon:visible">
                  <Modal>
-                    <ModalTrigger asChild>
+                    <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
                      <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
                        <Paintbrush
                          size={16}
@ -122,6 +125,7 @@ export default function ThreadList() {
                        </span>
                      </div>
                    </ModalTrigger>
+                    <ModalPortal />
                    <ModalContent>
                      <ModalHeader>
                        <ModalTitle>Clean Thread</ModalTitle>
@ -129,13 +133,19 @@ export default function ThreadList() {
                      <p>Are you sure you want to clean this thread?</p>
                      <ModalFooter>
                        <div className="flex gap-x-2">
-                          <ModalClose asChild>
+                          <ModalClose
+                            asChild
+                            onClick={(e) => e.stopPropagation()}
+                          >
                            <Button themes="ghost">No</Button>
                          </ModalClose>
                          <ModalClose asChild>
                            <Button
                              themes="danger"
-                              onClick={() => cleanThread(thread.id)}
+                              onClick={(e) => {
+                                e.stopPropagation()
+                                cleanThread(thread.id)
+                              }}
                              autoFocus
                            >
                              Yes
@ -145,9 +155,8 @@ export default function ThreadList() {
                      </ModalFooter>
                    </ModalContent>
                  </Modal>
-
                  <Modal>
-                    <ModalTrigger asChild>
+                    <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
                      <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
                        <Trash2Icon
                          size={16}
@ -158,6 +167,7 @@ export default function ThreadList() {
                        </span>
                      </div>
                    </ModalTrigger>
+                    <ModalPortal />
                    <ModalContent>
                      <ModalHeader>
                        <ModalTitle>Delete Thread</ModalTitle>
@ -168,14 +178,20 @@ export default function ThreadList() {
                      </p>
                      <ModalFooter>
                        <div className="flex gap-x-2">
-                          <ModalClose asChild>
+                          <ModalClose
+                            asChild
+                            onClick={(e) => e.stopPropagation()}
+                          >
                            <Button themes="ghost">No</Button>
                          </ModalClose>
                          <ModalClose asChild>
                            <Button
                              autoFocus
                              themes="danger"
-                              onClick={() => deleteThread(thread.id)}
+                              onClick={(e) => {
+                                e.stopPropagation()
+                                deleteThread(thread.id)
+                              }}
                            >
                              Yes
                            </Button>
--- a/web/screens/ExploreModels/ExploreModelList/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelList/index.tsx
@ -1,4 +1,3 @@
-/* eslint-disable @typescript-eslint/naming-convention */
 import { Model } from '@janhq/core'

 import ExploreModelItem from '@/screens/ExploreModels/ExploreModelItem'
@ -8,33 +7,45 @@ type Props = {
 }

 const ExploreModelList: React.FC<Props> = ({ models }) => {
-  const sortOrder: Record<string, number> = {
-    '7b': 1,
-    '13b': 2,
-    '34b': 3,
-    '70b': 4,
-    '120b': 5,
-    'tiny': 6,
+  const takenModelIds: string[] = []
+  const featuredModels = models
+    .filter((m) => {
+      if (m.metadata.tags.includes('Featured')) {
+        takenModelIds.push(m.id)
+        return m
      }
-  const sortedModels = models?.sort((a, b) => {
-    const aIsFeatured = a.metadata.tags.includes('Featured')
-    const bIsFeatured = b.metadata.tags.includes('Featured')
-    const aIsRecommended = a.metadata.tags.includes('Recommended')
-    const bIsRecommended = b.metadata.tags.includes('Recommended')
-    const aNumericTag =
-      a.metadata.tags.find((tag) => !!sortOrder[tag.toLowerCase()]) ?? 'Tiny'
-    const bNumericTag =
-      b.metadata.tags.find((tag) => !!sortOrder[tag.toLowerCase()]) ?? 'Tiny'
-
-    if (aIsFeatured !== bIsFeatured) return aIsFeatured ? -1 : 1
-    if (aNumericTag !== bNumericTag)
-      return (
-        sortOrder[aNumericTag.toLowerCase()] -
-        sortOrder[bNumericTag.toLowerCase()]
-      )
-    if (aIsRecommended !== bIsRecommended) return aIsRecommended ? -1 : 1
-    return a.metadata.size - b.metadata.size
    })
+    .sort((m1, m2) => m1.metadata.size - m2.metadata.size)
+
+  const recommendedModels = models
+    .filter((m) => {
+      if (m.metadata.tags.includes('Recommended')) {
+        takenModelIds.push(m.id)
+        return m
+      }
+    })
+    .sort((m1, m2) => m1.metadata.size - m2.metadata.size)
+
+  const openAiModels = models
+    .filter((m) => {
+      if (m.engine === 'openai') {
+        takenModelIds.push(m.id)
+        return m
+      }
+    })
+    .sort((m1: Model, m2: Model) => m1.name.localeCompare(m2.name))
+
+  const remainingModels = models
+    .filter((m) => !takenModelIds.includes(m.id))
+    .sort((m1, m2) => m1.metadata.size - m2.metadata.size)
+
+  const sortedModels: Model[] = [
+    ...featuredModels,
+    ...recommendedModels,
+    ...openAiModels,
+    ...remainingModels,
+  ]
+
  return (
    <div className="relative h-full w-full flex-shrink-0">
      {sortedModels?.map((model) => (
--- a/web/screens/ExploreModels/index.tsx
+++ b/web/screens/ExploreModels/index.tsx
@ -1,5 +1,6 @@
 import { useState } from 'react'

+import { openExternalUrl } from '@janhq/core'
 import {
  Input,
  ScrollArea,
@ -44,6 +45,10 @@ const ExploreModelsScreen = () => {
    }
  })

+  const onHowToImportModelClick = () => {
+    openExternalUrl('https://jan.ai/guides/using-models/import-manually/')
+  }
+
  if (loading) return <Loader description="loading ..." />

  return (
@ -72,13 +77,12 @@ const ExploreModelsScreen = () => {
                  />
                </div>
                <div className="mt-2 text-center">
-                  <a
-                    href="https://jan.ai/guides/using-models/import-manually/"
-                    target="_blank"
-                    className="font-semibold text-white underline"
+                  <p
+                    onClick={onHowToImportModelClick}
+                    className="cursor-pointer font-semibold text-white underline"
                  >
                    How to manually import models
-                  </a>
+                  </p>
                </div>
              </div>
            </div>
 @ -1 +1 @@
 .1.32
 .1.34