Merge branch 'main' into docs/fix-error-link

This commit is contained in:
Ho Duc Hieu 2024-01-05 14:12:31 +07:00
commit b95a5a3cba
67 changed files with 843 additions and 492 deletions

View File

@ -70,25 +70,25 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
<tr style="text-align:center"> <tr style="text-align:center">
<td style="text-align:center"><b>Experimental (Nightly Build)</b></td> <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
<td style="text-align:center"> <td style="text-align:center">
<a href='https://delta.jan.ai/0.4.3-118/jan-win-x64-0.4.3-118.exe'> <a href='https://delta.jan.ai/0.4.3-123/jan-win-x64-0.4.3-123.exe'>
<img src='./docs/static/img/windows.png' style="height:14px; width: 14px" /> <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
<b>jan.exe</b> <b>jan.exe</b>
</a> </a>
</td> </td>
<td style="text-align:center"> <td style="text-align:center">
<a href='https://delta.jan.ai/0.4.3-118/jan-mac-x64-0.4.3-118.dmg'> <a href='https://delta.jan.ai/0.4.3-123/jan-mac-x64-0.4.3-123.dmg'>
<img src='./docs/static/img/mac.png' style="height:15px; width: 15px" /> <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
<b>Intel</b> <b>Intel</b>
</a> </a>
</td> </td>
<td style="text-align:center"> <td style="text-align:center">
<a href='https://delta.jan.ai/0.4.3-118/jan-mac-arm64-0.4.3-118.dmg'> <a href='https://delta.jan.ai/0.4.3-123/jan-mac-arm64-0.4.3-123.dmg'>
<img src='./docs/static/img/mac.png' style="height:15px; width: 15px" /> <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
<b>M1/M2</b> <b>M1/M2</b>
</a> </a>
</td> </td>
<td style="text-align:center"> <td style="text-align:center">
<a href='https://delta.jan.ai/0.4.3-118/jan-linux-amd64-0.4.3-118.deb'> <a href='https://delta.jan.ai/0.4.3-123/jan-linux-amd64-0.4.3-123.deb'>
<img src='./docs/static/img/linux.png' style="height:14px; width: 14px" /> <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
<b>jan.deb</b> <b>jan.deb</b>
</a> </a>

60
USAGE.md Normal file
View File

@ -0,0 +1,60 @@
## Requirements for running Jan App in GPU mode on Windows and Linux
- You must have an NVIDIA driver that supports CUDA 11.4 or higher. Refer [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
To check if the NVIDIA driver is installed, open PowerShell or Terminal and enter the following command:
```bash
nvidia-smi
```
If you see a result similar to the following, you have successfully installed the NVIDIA driver:
```bash
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:01:00.0 On | N/A |
| 0% 51C P8 10W / 170W | 364MiB / 7982MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
```
- You must have CUDA 11.4 or higher.
To check if CUDA is installed, open PowerShell or Terminal and enter the following command:
```bash
nvcc --version
```
If you see a result similar to the following, you have successfully installed CUDA:
```bash
nvcc: NVIDIA (R) Cuda compiler driver
Cuda compilation tools, release 11.4, V11.4.100
Build cuda_11.4.r11.4/compiler.30033411_0
```
- Specifically for Linux, you will need to have a CUDA compatible driver, refer [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver), and you must add the `.so` libraries of CUDA and the CUDA compatible driver to the `LD_LIBRARY_PATH` environment variable, refer [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions).
## How to switch mode CPU/GPU Jan app
By default, Jan app will run in CPU mode. When starting Jan app, the program will automatically check if your computer meets the requirements to run in GPU mode. If it does, we will automatically enable GPU mode and pick the GPU has highest VGRAM for you (feature allowing users to select one or more GPU devices for use - currently in planning). You can check whether you are using CPU mode or GPU mode in the settings/advance section of Jan app. (see image below). ![](/docs/static/img/usage/jan-gpu-enable-setting.png)
If you have GPU mode but it is not enabled by default, the following possibilities may exist, you can follow the next steps to fix the error:
1. You have not installed the NVIDIA driver, refer to the NVIDIA driver that supports CUDA 11.4 [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
2. You have not installed the CUDA toolkit or your CUDA toolkit is not compatible with the NVIDIA driver, refer to CUDA compatibility [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver).
3. You have not installed a CUDA compatible driver, refer [here](https://docs.nvidia.com/deploy/cuda-compatibility/index.html#binary-compatibility__table-toolkit-driver), and you must add the `.so` libraries of CUDA and the CUDA compatible driver to the `LD_LIBRARY_PATH` environment variable, refer [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions). For Windows, add the `.dll` libraries of CUDA and the CUDA compatible driver to the `PATH` environment variable. Usually, when installing CUDA on Windows, this environment variable is automatically added, but if you do not see it, you can add it manually by referring [here](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/index.html#environment-setup).
## To check the current GPU-related settings that Jan app has detected, you can go to the Settings/Advanced section as shown in the image below.
![](/docs/static/img/usage/jan-open-home-directory.png)
![](/docs/static/img/usage/jan-open-settings-1.png)
![](/docs/static/img/usage/jan-open-settings-2.png)
![](/docs/static/img/usage/jan-open-settings-3.png)
When you have an issue with GPU mode, share the `settings.json` with us will help us to solve the problem faster.
## Tested on
- Windows 11 Pro 64-bit, NVIDIA GeForce RTX 4070ti GPU, CUDA 12.2, NVIDIA driver 531.18
- Ubuntu 22.04 LTS, NVIDIA GeForce RTX 4070ti GPU, CUDA 12.2, NVIDIA driver 545

View File

@ -53,9 +53,10 @@ export enum FileSystemRoute {
writeFileSync = 'writeFileSync', writeFileSync = 'writeFileSync',
} }
export enum FileManagerRoute { export enum FileManagerRoute {
synceFile = 'syncFile', syncFile = 'syncFile',
getUserSpace = 'getUserSpace', getUserSpace = 'getUserSpace',
getResourcePath = 'getResourcePath', getResourcePath = 'getResourcePath',
fileStat = 'fileStat',
} }
export type ApiFunction = (...args: any[]) => any export type ApiFunction = (...args: any[]) => any

View File

@ -1,3 +1,5 @@
import { FileStat } from './types'
/** /**
* Execute a extension module function in main process * Execute a extension module function in main process
* *
@ -74,6 +76,15 @@ const openExternalUrl: (url: string) => Promise<any> = (url) =>
*/ */
const getResourcePath: () => Promise<string> = () => global.core.api?.getResourcePath() const getResourcePath: () => Promise<string> = () => global.core.api?.getResourcePath()
/**
* Gets the file's stats.
*
* @param path - The path to the file.
* @returns {Promise<FileStat>} - A promise that resolves with the file's stats.
*/
const fileStat: (path: string) => Promise<FileStat | undefined> = (path) =>
global.core.api?.fileStat(path)
/** /**
* Register extension point function type definition * Register extension point function type definition
*/ */
@ -97,4 +108,5 @@ export {
joinPath, joinPath,
openExternalUrl, openExternalUrl,
baseName, baseName,
fileStat,
} }

View File

@ -0,0 +1,12 @@
import { FileManagerRoute } from '../../../api'
import { HttpServer } from '../../index'
export const fsRouter = async (app: HttpServer) => {
app.post(`/app/${FileManagerRoute.syncFile}`, async (request: any, reply: any) => {})
app.post(`/app/${FileManagerRoute.getUserSpace}`, async (request: any, reply: any) => {})
app.post(`/app/${FileManagerRoute.getResourcePath}`, async (request: any, reply: any) => {})
app.post(`/app/${FileManagerRoute.fileStat}`, async (request: any, reply: any) => {})
}

View File

@ -0,0 +1,4 @@
export type FileStat = {
isDirectory: boolean
size: number
}

View File

@ -4,3 +4,4 @@ export * from './thread'
export * from './message' export * from './message'
export * from './inference' export * from './inference'
export * from './monitoring' export * from './monitoring'
export * from './file'

View File

@ -64,7 +64,7 @@ Jan is a startup with an open source business model. We believe in the need for
We use Github to build in public, and welcome anyone to join in. We use Github to build in public, and welcome anyone to join in.
- [Jan's Kanban](https://github.com/orgs/janhq/projects/5) - [Jan's Kanban](https://github.com/orgs/janhq/projects/5)
- [Jan's Roadmap](https://github.com/orgs/janhq/projects/5/views/2) - [Jan's Roadmap](https://github.com/orgs/janhq/projects/5/views/29)
### Bootstrapped ### Bootstrapped

View File

@ -15,36 +15,32 @@ keywords:
] ]
--- ---
Jan is a ChatGPT-alternative that runs on your own computer, with a [local API server](/api-reference). Jan is a ChatGPT alternative that runs on your own computer, with a [local API server](/guides/using-server).
Jan uses [open-source AI models](/docs/engineering/models), stores data in [open file formats](/developer/file-based), is highly customizable via [extensions](/developer/build-extension). We believe in the need for an open source AI ecosystem. We're focused on building infra, tooling and [custom models](https://huggingface.co/janhq) to allow open source AIs to compete on a level playing field with proprietary offerings.
Jan believes in the need for an open source AI ecosystem. We aim to build infra and tooling to allow open source AIs to compete on a level playing field with proprietary offerings. ## Features
- Compatible with [open-source models](/guides/using-models) (GGUF, TensorRT, and remote APIs)
- Compatible with most OSes: [Windows](/install/windows/), [Mac](/install/mac), [Linux](/install/linux), with/without GPU acceleration
- Stores data in [open file formats](/developer/file-based)
- Customizable via [extensions](/developer/build-extension)
- And more in the [roadmap](https://github.com/orgs/janhq/projects/5/views/16). Join us on [Discord](https://discord.gg/5rQ2zTv3be) and tell us what you want to see!
## Why Jan? ## Why Jan?
#### 💻 Own your AI #### 💻 Own your AI
Jan runs 100% on your own machine, [predictably](https://www.reddit.com/r/LocalLLaMA/comments/17mghqr/comment/k7ksti6/?utm_source=share&utm_medium=web2x&context=3), privately and even offline. No one else can see your conversations, not even us. Jan runs 100% on your own machine, predictably, privately and offline. No one else can see your conversations, not even us.
#### 🏗️ Extensions #### 🏗️ Extensions
Jan ships with a powerful [extension framework](/developer/build-extension), which allows developers to extend and customize Jan's functionality. In fact, most core modules of Jan are [built as extensions](/developer/architecture) and use the same extensions API. Jan ships with a local-first, AI-native, and cross platform [extensions framework](/developer/build-extension). Developers can extend and customize everything from functionality to UI to branding. In fact, Jan's current main features are actually built as extensions on top of this framework.
#### 🗂️ Open File Formats #### 🗂️ Open File Formats
Jan stores data in a [local folder of non-proprietary files](/developer/architecture). You're never locked-in and can do what you want with your data with extensions, or even a different app. Jan stores data in your [local filesystem](/developer/file-based). Your data never leaves your computer. You are free to delete, export, migrate your data, even to a different platform.
#### 🌍 Open Source #### 🌍 Open Source
Both Jan and [Nitro](https://nitro.jan.ai), our lightweight inference engine, are licensed via the open source [AGPLv3 license](https://github.com/janhq/jan/blob/main/LICENSE). Both Jan and [Nitro](https://nitro.jan.ai), our lightweight inference engine, are licensed via the open source [AGPLv3 license](https://github.com/janhq/jan/blob/main/LICENSE).
<!-- ## Design Principles -->
<!-- OpenAI meets VSCode meets Obsidian.
Minimalism: https://docusaurus.io/docs#design-principles. Not having abstractions is better than having the wrong abstractions. Assistants as code. Only including features that are absolutely necessary in the Jan API.
File-based: User should be able to look at a Jan directory and intuit how it works. Transparency. Editing things via a text editor, vs. needing a database tool for SQLite.
Participatory: https://www.getlago.com/blog/the-5-reasons-why-we-chose-open-source -->

View File

@ -0,0 +1,55 @@
---
title: Hardware Requirements
description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
keywords:
[
Jan AI,
Jan,
ChatGPT alternative,
local AI,
private AI,
conversational AI,
no-subscription fee,
large language model,
]
---
Jan is designed to be lightweight and able to run Large Language Models (LLMs) out-of-the-box.
The current download size is less than 150 MB and has a disk space of ~300 MB.
To ensure optimal performance, please see the following system requirements:
## Disk Space
- Minimum requirement
- At least 5 GB of free disk space is required to accommodate the download, storage, and management of open-source LLM models.
- Recommended
- For an optimal experience and to run most available open-source LLM models on Jan, it is recommended to have 10 GB of free disk space.
## RAM and GPU VRAM
The amount of RAM on your system plays a crucial role in determining the size and complexity of LLM models you can effectively run. Jan can be utilized on traditional computers where RAM is a key resource. For enhanced performance, Jan also supports GPU acceleration, utilizing the VRAM of your graphics card.
## Best Models for your V/RAM
The RAM and GPU VRAM requirements are dependent on the size and complexity of the LLM models you intend to run. The following are some general guidelines to help you determine the amount of RAM or VRAM you need to run LLM models on Jan
- `8 GB of RAM`: Suitable for running smaller models like 3B models or quantized 7B models
- `16 GB of RAM (recommended)`: This is considered the "minimum usable models" threshold, particularly for 7B models (e.g Mistral 7B, etc)
- `Beyond 16GB of RAM`: Required for handling larger and more sophisticated model, such as 70B models.
## Architecture
Jan is designed to run on muptiple architectures, versatility and widespread usability. The supported architectures include:
### CPU Support
- `x86`: Jan is well-suited for systems with x86 architecture, which is commonly found in traditional desktops and laptops. It ensures smooth performance on a variety of devices using x86 processors.
- `ARM`: Jan is optimized to run efficiently on ARM-based systems, extending compatibility to a broad range of devices using ARM processors.
### GPU Support
- `NVIDIA`
- `AMD`
- `ARM64 Mac`

View File

@ -21,9 +21,8 @@ import TabItem from "@theme/TabItem";
In this quickstart we'll show you how to: In this quickstart we'll show you how to:
- Download the Jan Desktop client - Mac, Windows, Linux, (and toaster) compatible - Download the Jan Desktop client - Mac, Windows, Linux, (and toaster) compatible
- Download and customize models - Download the Nightly (unstable) version
- Import custom models - Build the application from source
- Use the local server at port `1337`
## Setup ## Setup
@ -50,89 +49,3 @@ In this quickstart we'll show you how to:
- To build Jan Desktop from scratch (and have the right to tinker!) - To build Jan Desktop from scratch (and have the right to tinker!)
See the [Build from Source](/install/from-source) guide. See the [Build from Source](/install/from-source) guide.
### Working with Models
Jan provides a list of recommended models to get you started.
You can find them in the in-app Hub.
1. `cmd + k` and type "hub" to open the Hub.
2. Download your preferred models.
3. `cmd + k` and type "chat" to open the conversation UI and start chatting.
4. Your model may take a few seconds to start up.
5. You can customize the model settings, at each conversation thread level, on the right panel.
6. To change model defaults globally, edit the `model.json` file. See the [Models](/guides/models) guide.
### Importing Models
Jan is compatible with all GGUF models.
For more information on how to import custom models, not found in the Hub, see the [Models](/guides/models) guide.
## Working with the Local Server
> This feature is currently under development. So expect bugs!
Jan runs a local server on port `1337` by default.
The endpoints are OpenAI compatible.
See the [API server guide](/guides/server) for more information.
## Next Steps
---
TODO: Merge this in:
Getting up and running open-source AI models on your own computer with Jan is quick and easy. Jan is lightweight and can run on a variety of hardware and platform versions. Specific requirements tailored to your platform are outlined below.
## Cross platform
A free, open-source alternative to OpenAI that runs on the Linux, macOS, and Windows operating systems. Please refer to the specific guides below for your platform
- [Linux](/install/linux)
- [MacOS (Mac Intel Chip and Mac Apple Silicon Chip)](/install/mac)
- [Windows](/install/windows)
## Requirements for Jan
### Hardware
Jan is a lightweight platform designed for seamless download, storage, and execution of open-source Large Language Models (LLMs). With a small download size of less than 200 MB and a disk footprint of under 300 MB, Jan is optimized for efficiency and should run smoothly on modern hardware.
To ensure optimal performance while using Jan and handling LLM models, it is recommended to meet the following system requirements:
#### Disk space
- Minimum requirement
- At least 5 GB of free disk space is required to accommodate the download, storage, and management of open-source LLM models.
- Recommended
- For an optimal experience and to run most available open-source LLM models on Jan, it is recommended to have 10 GB of free disk space.
#### Random Access Memory (RAM) and Graphics Processing Unit Video Random Access Memory (GPU VRAM)
The amount of RAM on your system plays a crucial role in determining the size and complexity of LLM models you can effectively run. Jan can be utilized on traditional computers where RAM is a key resource. For enhanced performance, Jan also supports GPU acceleration, utilizing the VRAM of your graphics card.
#### Relationship between RAM and VRAM Sizes in Relation to LLM Models
The RAM and GPU VRAM requirements are dependent on the size and complexity of the LLM models you intend to run. The following are some general guidelines to help you determine the amount of RAM or VRAM you need to run LLM models on Jan
- 8 GB of RAM: Suitable for running smaller models like 3B models or quantized 7B models
- 16 GB of RAM(recommended): This is considered the "minimum usable models" threshold, particularly for 7B models (e.g Mistral 7B, etc)
- Beyond 16GB of RAM: Required for handling larger and more sophisticated model, such as 70B models.
### Architecture
Jan is designed to run on muptiple architectures, versatility and widespread usability. The supported architectures include:
#### CPU
- x86: Jan is well-suited for systems with x86 architecture, which is commonly found in traditional desktops and laptops. It ensures smooth performance on a variety of devices using x86 processors.
- ARM: Jan is optimized to run efficiently on ARM-based systems, extending compatibility to a broad range of devices using ARM processors.
#### GPU
- NVIDIA: Jan optimizes the computational capabilities of NVIDIA GPUs, achieving efficiency through the utilization of llama.cpp. This strategic integration enhances the performance of Jan, particularly in resource-intensive Language Model (LLM) tasks. Users can expect accelerated processing and improved responsiveness when leveraging the processing capabilities inherent in NVIDIA GPUs.
- AMD: Users with AMD GPUs can seamlessly integrate Jan's GPU acceleration, offering a comprehensive solution for diverse hardware configurations and preferences.
- ARM64 Mac: Jan seamlessly supports ARM64 architecture on Mac systems, leveraging Metal for efficient GPU operations. This ensures a smooth and efficient experience for users with Apple Silicon Chips, utilizing the power of Metal for optimal performance on ARM64 Mac devices.

View File

@ -1,12 +0,0 @@
---
title: Starting a Thread
---
Rough outline:
Choosing an assistant
Setting assistant instructions
At thread level
Globally, as default
Choosing a model
Customizing model params (thread level)
Customizing engine params

View File

@ -1,3 +0,0 @@
---
title: Uploading docs
---

View File

@ -1,3 +0,0 @@
---
title: Uploading Images
---

View File

@ -1,3 +1,56 @@
--- ---
title: Manage Chat History title: Manage Chat History
slug: /guides/chatting/manage-history/
description: Jan is a ChatGPT-alternative that runs on your own computer, with a local API server.
keywords:
[
Jan AI,
Jan,
ChatGPT alternative,
local AI,
private AI,
conversational AI,
no-subscription fee,
large language model,
manage-chat-history,
]
--- ---
Jan offers a convenient and private way to interact with a conversational AI locally on your computer. This guide will walk you through how to manage your chat history with Jan, ensuring your interactions remain private and organized.
## Viewing Chat History
1. Navigate to the main dashboard.
2. Locate the list of threads on the left side of the screen. This list shows all your conversations.
3. Select a thread to view the conversation in the main chat window.
4. Scroll up and down to view the entire chat history in the selected thread.
<br></br>
![viewing-chat-history](./assets/viewing-chat-history.gif)
## Managing Threads via Folders
This feature allows you to directly manage your thread history and configurations.
1. Navigate to the Thread that you want to manage via the list of threads on the left side of the dashboard.
2. Click on the three dots (⋮) on the `Thread` section on the right side of the dashboard. There are two options:
- `Reveal in Finder` will open the folder containing the thread history and configurations.
- `View as JSON` will open the thread.json file in your default browser.
<br></br>
![managing-threads-via-folders](./assets/managing-threads-via-folders.gif)
## Clean Thread
To streamline your conservation view, click on the three dots (⋮) on the thread you want to clean, then select `Clean Thread`. It will remove all messages from the thread. It is useful if you want to keep the thread settings, but want to remove the messages from the chat window.
<br></br>
![clean-thread](./assets/clean-thread.gif)
## Delete Thread
To delete a thread, click on the three dots (⋮) on the thread you want to delete, then select `Delete Thread`. It will remove the thread from the list of threads.
<br></br>
![delete-thread](./assets/delete-thread.gif)

Binary file not shown.

After

Width:  |  Height:  |  Size: 360 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 342 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 333 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 342 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 MiB

View File

@ -1,3 +0,0 @@
---
title: Customize Model Defaults
---

View File

@ -1,3 +0,0 @@
---
title: Package & Publish Models
---

View File

@ -1,31 +1,31 @@
---
openapi: 3.0.0 openapi: 3.0.0
info: info:
title: API Reference title: API Reference
description: > description: >
# Introduction # Introduction
Jan API is compatible with the [OpenAI Jan API is compatible with the [OpenAI API](https://platform.openai.com/docs/api-reference).
API](https://platform.openai.com/docs/api-reference).
version: 0.1.8 version: 0.1.8
contact: contact:
name: Jan Discord name: Jan Discord
url: "https://discord.gg/7EcEz7MrvA" url: https://discord.gg/7EcEz7MrvA
license: license:
name: AGPLv3 name: AGPLv3
url: "https://github.com/janhq/nitro/blob/main/LICENSE" url: https://github.com/janhq/nitro/blob/main/LICENSE
servers: servers:
- url: "http://localhost:1337/v1/" - url: http://localhost:1337/v1/
tags: tags:
- name: Models - name: Models
description: List and describe the various models available in the API. description: List and describe the various models available in the API.
- name: Chat - name: Chat
description: > description: >
Given a list of messages comprising a conversation, the model will return Given a list of messages comprising a conversation, the model will
a response. return a response.
- name: Messages - name: Messages
description: > description: >
Messages capture a conversation's content. This can include the content Messages capture a conversation's content. This can include the
from LLM responses and other metadata from [chat content from LLM responses and other metadata from [chat
completions](/specs/chats). completions](/specs/chats).
- name: Threads - name: Threads
- name: Assistants - name: Assistants
@ -49,34 +49,37 @@ paths:
summary: | summary: |
Create chat completion Create chat completion
description: > description: >
Creates a model response for the given chat conversation. <a href = Creates a model response for the given chat conversation. <a href
"https://platform.openai.com/docs/api-reference/chat/create"> Equivalent = "https://platform.openai.com/docs/api-reference/chat/create">
to OpenAI's create chat completion. </a> Equivalent to OpenAI's create chat completion. </a>
requestBody: requestBody:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest" $ref: specs/chat.yaml#/components/schemas/ChatCompletionRequest
responses: responses:
"200": "200":
description: OK description: OK
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse" $ref: specs/chat.yaml#/components/schemas/ChatCompletionResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: > source: |
curl -X POST curl http://localhost:1337/v1/chat/completions \
'http://localhost:3982/inferences/llamacpp/chat_completion' \
-H "Content-Type: application/json" \ -H "Content-Type: application/json" \
-d '{ -d '{
"llama_model_path": "/path/to/your/model.gguf", "model": "tinyllama-1.1b",
"messages": [ "messages": [
{ {
"role": "user", "role": "system",
"content": "hello" "content": "You are a helpful assistant."
}, },
{
"role": "user",
"content": "Hello!"
}
] ]
}' }'
/models: /models:
@ -86,17 +89,17 @@ paths:
- Models - Models
summary: List models summary: List models
description: > description: >
Lists the currently available models, and provides basic information Lists the currently available models, and provides basic
about each one such as the owner and availability. <a href = information about each one such as the owner and availability. <a href
"https://platform.openai.com/docs/api-reference/models/list"> Equivalent = "https://platform.openai.com/docs/api-reference/models/list">
to OpenAI's list model. </a> Equivalent to OpenAI's list model. </a>
responses: responses:
"200": "200":
description: OK description: OK
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/ListModelsResponse" $ref: specs/models.yaml#/components/schemas/ListModelsResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -114,7 +117,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/DownloadModelResponse" $ref: specs/models.yaml#/components/schemas/DownloadModelResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -126,8 +129,8 @@ paths:
- Models - Models
summary: Retrieve model summary: Retrieve model
description: > description: >
Get a model instance, providing basic information about the model such Get a model instance, providing basic information about the model
as the owner and permissioning. <a href = such as the owner and permissioning. <a href =
"https://platform.openai.com/docs/api-reference/models/retrieve"> "https://platform.openai.com/docs/api-reference/models/retrieve">
Equivalent to OpenAI's retrieve model. </a> Equivalent to OpenAI's retrieve model. </a>
parameters: parameters:
@ -145,7 +148,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/GetModelResponse" $ref: specs/models.yaml#/components/schemas/GetModelResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -174,7 +177,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/DeleteModelResponse" $ref: specs/models.yaml#/components/schemas/DeleteModelResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -202,7 +205,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/StartModelResponse" $ref: specs/models.yaml#/components/schemas/StartModelResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -229,7 +232,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/StopModelResponse" $ref: specs/models.yaml#/components/schemas/StopModelResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -255,14 +258,14 @@ paths:
type: array type: array
description: Initial set of messages for the thread. description: Initial set of messages for the thread.
items: items:
$ref: "specs/threads.yaml#/components/schemas/ThreadMessageObject" $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
responses: responses:
"200": "200":
description: Thread created successfully description: Thread created successfully
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/CreateThreadResponse" $ref: specs/threads.yaml#/components/schemas/CreateThreadResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -293,7 +296,7 @@ paths:
schema: schema:
type: array type: array
items: items:
$ref: "specs/threads.yaml#/components/schemas/ThreadObject" $ref: specs/threads.yaml#/components/schemas/ThreadObject
example: example:
- id: thread_abc123 - id: thread_abc123
object: thread object: thread
@ -340,7 +343,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/GetThreadResponse" $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -374,14 +377,14 @@ paths:
type: array type: array
description: Set of messages to update in the thread. description: Set of messages to update in the thread.
items: items:
$ref: "specs/threads.yaml#/components/schemas/ThreadMessageObject" $ref: specs/threads.yaml#/components/schemas/ThreadMessageObject
responses: responses:
"200": "200":
description: Thread modified successfully description: Thread modified successfully
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/ModifyThreadResponse" $ref: specs/threads.yaml#/components/schemas/ModifyThreadResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -421,7 +424,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/DeleteThreadResponse" $ref: specs/threads.yaml#/components/schemas/DeleteThreadResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -448,7 +451,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/GetThreadResponse" $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -484,7 +487,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/GetThreadResponse" $ref: specs/threads.yaml#/components/schemas/GetThreadResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -513,7 +516,7 @@ paths:
created_at: 1698984975 created_at: 1698984975
name: Math Tutor name: Math Tutor
description: null description: null
avatar: "https://pic.png" avatar: https://pic.png
models: models:
- model_id: model_0 - model_id: model_0
instructions: Be concise instructions: Be concise
@ -527,7 +530,7 @@ paths:
created_at: 1698984975 created_at: 1698984975
name: Physics Tutor name: Physics Tutor
description: null description: null
avatar: "https://pic.png" avatar: https://pic.png
models: models:
- model_id: model_1 - model_id: model_1
instructions: Be concise! instructions: Be concise!
@ -559,8 +562,7 @@ paths:
properties: properties:
models: models:
type: array type: array
description: >- description: List of models associated with the assistant. Jan-specific
List of models associated with the assistant. Jan-specific
property. property.
items: items:
type: object type: object
@ -574,8 +576,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: >- $ref: specs/assistants.yaml#/components/schemas/CreateAssistantResponse
specs/assistants.yaml#/components/schemas/CreateAssistantResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -613,8 +614,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: >- $ref: specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
specs/assistants.yaml#/components/schemas/RetrieveAssistantResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -647,8 +647,7 @@ paths:
properties: properties:
models: models:
type: array type: array
description: >- description: List of models associated with the assistant. Jan-specific
List of models associated with the assistant. Jan-specific
property. property.
items: items:
type: object type: object
@ -670,8 +669,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: >- $ref: specs/assistants.yaml#/components/schemas/ModifyAssistantResponse
specs/assistants.yaml#/components/schemas/ModifyAssistantResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -710,8 +708,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: >- $ref: specs/assistants.yaml#/components/schemas/DeleteAssistantResponse
specs/assistants.yaml#/components/schemas/DeleteAssistantResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -741,7 +738,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/messages.yaml#/components/schemas/ListMessagesResponse" $ref: specs/messages.yaml#/components/schemas/ListMessagesResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -794,7 +791,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/messages.yaml#/components/schemas/CreateMessageResponse" $ref: specs/messages.yaml#/components/schemas/CreateMessageResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
@ -838,12 +835,12 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/messages.yaml#/components/schemas/GetMessageResponse" $ref: specs/messages.yaml#/components/schemas/GetMessageResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: > source: >
curl curl http://localhost:1337/v1/threads/{thread_id}/messages/{message_id}
http://localhost:1337/v1/threads/{thread_id}/messages/{message_id} \ \
-H "Content-Type: application/json" -H "Content-Type: application/json"
"/threads/{thread_id}/messages/{message_id}/files": "/threads/{thread_id}/messages/{message_id}/files":
get: get:
@ -879,8 +876,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: >- $ref: specs/messages.yaml#/components/schemas/ListMessageFilesResponse
specs/messages.yaml#/components/schemas/ListMessageFilesResponse
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: > source: >
@ -895,8 +891,8 @@ paths:
- Messages - Messages
summary: Retrieve message file summary: Retrieve message file
description: > description: >
Retrieves a file associated with a specific message in a thread. <a Retrieves a file associated with a specific message in a
href = thread. <a href =
"https://platform.openai.com/docs/api-reference/messages/getMessageFile"> "https://platform.openai.com/docs/api-reference/messages/getMessageFile">
Equivalent to OpenAI's retrieve message file. </a> Equivalent to OpenAI's retrieve message file. </a>
parameters: parameters:
@ -930,7 +926,7 @@ paths:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/messages.yaml#/components/schemas/MessageFileObject" $ref: specs/messages.yaml#/components/schemas/MessageFileObject
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: > source: >
@ -953,14 +949,15 @@ x-webhooks:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/models.yaml#/components/schemas/ModelObject" $ref: specs/models.yaml#/components/schemas/ModelObject
AssistantObject: AssistantObject:
post: post:
summary: The assistant object summary: The assistant object
description: > description: >
Build assistants that can call models and use tools to perform tasks. Build assistants that can call models and use tools to perform
<a href = "https://platform.openai.com/docs/api-reference/assistants"> tasks. <a href =
Equivalent to OpenAI's assistants object. </a> "https://platform.openai.com/docs/api-reference/assistants"> Equivalent
to OpenAI's assistants object. </a>
operationId: AssistantObjects operationId: AssistantObjects
tags: tags:
- Assistants - Assistants
@ -968,7 +965,7 @@ x-webhooks:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/assistants.yaml#/components/schemas/AssistantObject" $ref: specs/assistants.yaml#/components/schemas/AssistantObject
MessageObject: MessageObject:
post: post:
summary: The message object summary: The message object
@ -983,12 +980,11 @@ x-webhooks:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/messages.yaml#/components/schemas/MessageObject" $ref: specs/messages.yaml#/components/schemas/MessageObject
ThreadObject: ThreadObject:
post: post:
summary: The thread object summary: The thread object
description: >- description: Represents a thread that contains messages. <a href =
Represents a thread that contains messages. <a href =
"https://platform.openai.com/docs/api-reference/threads/object"> "https://platform.openai.com/docs/api-reference/threads/object">
Equivalent to OpenAI's thread object. </a> Equivalent to OpenAI's thread object. </a>
operationId: ThreadObject operationId: ThreadObject
@ -998,4 +994,4 @@ x-webhooks:
content: content:
application/json: application/json:
schema: schema:
$ref: "specs/threads.yaml#/components/schemas/ThreadObject" $ref: specs/threads.yaml#/components/schemas/ThreadObject

View File

@ -1,3 +1,4 @@
---
components: components:
schemas: schemas:
AssistantObject: AssistantObject:
@ -9,7 +10,7 @@ components:
example: asst_abc123 example: asst_abc123
object: object:
type: string type: string
description: "Type of the object, indicating it's an assistant." description: Type of the object, indicating it's an assistant.
default: assistant default: assistant
version: version:
type: integer type: integer
@ -31,7 +32,7 @@ components:
avatar: avatar:
type: string type: string
description: URL of the assistant's avatar. Jan-specific property. description: URL of the assistant's avatar. Jan-specific property.
example: "https://pic.png" example: https://pic.png
models: models:
type: array type: array
description: List of models associated with the assistant. Jan-specific property. description: List of models associated with the assistant. Jan-specific property.
@ -70,7 +71,7 @@ components:
example: asst_abc123 example: asst_abc123
object: object:
type: string type: string
description: "Type of the object, indicating it's an assistant." description: Type of the object, indicating it's an assistant.
default: assistant default: assistant
version: version:
type: integer type: integer
@ -92,7 +93,7 @@ components:
avatar: avatar:
type: string type: string
description: URL of the assistant's avatar. Jan-specific property. description: URL of the assistant's avatar. Jan-specific property.
example: "https://pic.png" example: https://pic.png
models: models:
type: array type: array
description: List of models associated with the assistant. Jan-specific property. description: List of models associated with the assistant. Jan-specific property.
@ -130,7 +131,7 @@ components:
example: asst_abc123 example: asst_abc123
object: object:
type: string type: string
description: "Type of the object, indicating it's an assistant." description: Type of the object, indicating it's an assistant.
default: assistant default: assistant
version: version:
type: integer type: integer
@ -152,7 +153,7 @@ components:
avatar: avatar:
type: string type: string
description: URL of the assistant's avatar. Jan-specific property. description: URL of the assistant's avatar. Jan-specific property.
example: "https://pic.png" example: https://pic.png
models: models:
type: array type: array
description: List of models associated with the assistant. Jan-specific property. description: List of models associated with the assistant. Jan-specific property.
@ -190,7 +191,7 @@ components:
example: asst_abc123 example: asst_abc123
object: object:
type: string type: string
description: "Type of the object, indicating it's an assistant." description: Type of the object, indicating it's an assistant.
default: assistant default: assistant
version: version:
type: integer type: integer
@ -212,7 +213,7 @@ components:
avatar: avatar:
type: string type: string
description: URL of the assistant's avatar. Jan-specific property. description: URL of the assistant's avatar. Jan-specific property.
example: "https://pic.png" example: https://pic.png
models: models:
type: array type: array
description: List of models associated with the assistant. Jan-specific property. description: List of models associated with the assistant. Jan-specific property.
@ -250,7 +251,7 @@ components:
example: asst_abc123 example: asst_abc123
object: object:
type: string type: string
description: "Type of the object, indicating it's an assistant." description: Type of the object, indicating it's an assistant.
default: assistant default: assistant
version: version:
type: integer type: integer
@ -272,7 +273,7 @@ components:
avatar: avatar:
type: string type: string
description: URL of the assistant's avatar. Jan-specific property. description: URL of the assistant's avatar. Jan-specific property.
example: "https://pic.png" example: https://pic.png
models: models:
type: array type: array
description: List of models associated with the assistant. Jan-specific property. description: List of models associated with the assistant. Jan-specific property.
@ -310,7 +311,7 @@ components:
example: asst_abc123 example: asst_abc123
object: object:
type: string type: string
description: "Type of the object, indicating the assistant has been deleted." description: Type of the object, indicating the assistant has been deleted.
example: assistant.deleted example: assistant.deleted
deleted: deleted:
type: boolean type: boolean

View File

@ -1,3 +1,4 @@
---
components: components:
schemas: schemas:
ChatObject: ChatObject:
@ -15,8 +16,7 @@ components:
stream: stream:
type: boolean type: boolean
default: true default: true
description: >- description: Enables continuous output generation, allowing for streaming of
Enables continuous output generation, allowing for streaming of
model responses. model responses.
model: model:
type: string type: string
@ -25,27 +25,23 @@ components:
max_tokens: max_tokens:
type: number type: number
default: 2048 default: 2048
description: >- description: The maximum number of tokens the model will generate in a single
The maximum number of tokens the model will generate in a single
response. response.
stop: stop:
type: arrays type: arrays
example: example:
- hello - hello
description: >- description: Defines specific tokens or phrases at which the model will stop
Defines specific tokens or phrases at which the model will stop
generating further output/ generating further output/
frequency_penalty: frequency_penalty:
type: number type: number
default: 0 default: 0
description: >- description: Adjusts the likelihood of the model repeating words or phrases in
Adjusts the likelihood of the model repeating words or phrases in
its output. its output.
presence_penalty: presence_penalty:
type: number type: number
default: 0 default: 0
description: >- description: Influences the generation of new and varied concepts in the model's
Influences the generation of new and varied concepts in the model's
output. output.
temperature: temperature:
type: number type: number
@ -71,13 +67,13 @@ components:
description: | description: |
Contains input data or prompts for the model to process. Contains input data or prompts for the model to process.
example: example:
- content: "Hello there :wave:" - content: You are a helpful assistant.
role: assistant role: system
- content: Can you write a long story - content: Hello!
role: user role: user
model: model:
type: string type: string
example: model-zephyr-7B example: tinyllama-1.1b
description: | description: |
Specifies the model being used for inference or processing tasks. Specifies the model being used for inference or processing tasks.
stream: stream:
@ -139,7 +135,7 @@ components:
type: string type: string
nullable: true nullable: true
example: null example: null
description: "Reason for finishing the response, if applicable" description: Reason for finishing the response, if applicable
index: index:
type: integer type: integer
example: 0 example: 0

View File

@ -1,3 +1,4 @@
---
components: components:
schemas: schemas:
ListModelsResponse: ListModelsResponse:
@ -27,8 +28,7 @@ components:
description: The version number of the model. description: The version number of the model.
id: id:
type: string type: string
description: >- description: Unique identifier used in chat-completions model_name, matches
Unique identifier used in chat-completions model_name, matches
folder name. folder name.
example: zephyr-7b example: zephyr-7b
name: name:
@ -57,14 +57,13 @@ components:
description: Current state of the model. description: Current state of the model.
format: format:
type: string type: string
description: "State format of the model, distinct from the engine." description: State format of the model, distinct from the engine.
example: ggufv3 example: ggufv3
source_url: source_url:
type: string type: string
format: uri format: uri
description: URL to the source of the model. description: URL to the source of the model.
example: >- example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
settings: settings:
type: object type: object
properties: properties:
@ -152,7 +151,7 @@ components:
example: zephyr-7b example: zephyr-7b
object: object:
type: string type: string
description: "Type of the object, indicating it's a model." description: Type of the object, indicating it's a model.
default: model default: model
created: created:
type: integer type: integer
@ -174,8 +173,7 @@ components:
type: string type: string
format: uri format: uri
description: URL to the source of the model. description: URL to the source of the model.
example: >- example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
engine_parameters: engine_parameters:
type: object type: object
properties: properties:
@ -198,8 +196,7 @@ components:
default: "ASSISTANT: " default: "ASSISTANT: "
ngl: ngl:
type: integer type: integer
description: >- description: Number of neural network layers loaded onto the GPU for
Number of neural network layers loaded onto the GPU for
acceleration. acceleration.
minimum: 0 minimum: 0
maximum: 100 maximum: 100
@ -207,18 +204,16 @@ components:
example: 100 example: 100
ctx_len: ctx_len:
type: integer type: integer
description: >- description: Context length for model operations, varies based on the specific
Context length for model operations, varies based on the model.
specific model.
minimum: 128 minimum: 128
maximum: 4096 maximum: 4096
default: 2048 default: 2048
example: 2048 example: 2048
n_parallel: n_parallel:
type: integer type: integer
description: >- description: Number of parallel operations, relevant when continuous batching is
Number of parallel operations, relevant when continuous batching enabled.
is enabled.
minimum: 1 minimum: 1
maximum: 10 maximum: 10
default: 1 default: 1
@ -269,8 +264,7 @@ components:
example: 4 example: 4
temperature: temperature:
type: number type: number
description: >- description: Controls randomness in model's responses. Higher values lead to
Controls randomness in model's responses. Higher values lead to
more random responses. more random responses.
minimum: 0 minimum: 0
maximum: 2 maximum: 2
@ -278,8 +272,7 @@ components:
example: 0.7 example: 0.7
token_limit: token_limit:
type: integer type: integer
description: >- description: Maximum number of tokens the model can generate in a single
Maximum number of tokens the model can generate in a single
response. response.
minimum: 1 minimum: 1
maximum: 4096 maximum: 4096
@ -287,18 +280,16 @@ components:
example: 2048 example: 2048
top_k: top_k:
type: integer type: integer
description: >- description: Limits the model to consider only the top k most likely next tokens
Limits the model to consider only the top k most likely next at each step.
tokens at each step.
minimum: 0 minimum: 0
maximum: 100 maximum: 100
default: 0 default: 0
example: 0 example: 0
top_p: top_p:
type: number type: number
description: >- description: Nucleus sampling parameter. The model considers the smallest set of
Nucleus sampling parameter. The model considers the smallest set tokens whose cumulative probability exceeds the top_p value.
of tokens whose cumulative probability exceeds the top_p value.
minimum: 0 minimum: 0
maximum: 1 maximum: 1
default: 1 default: 1

View File

@ -1,3 +1,4 @@
---
components: components:
schemas: schemas:
ThreadObject: ThreadObject:
@ -39,13 +40,13 @@ components:
settings: settings:
type: object type: object
description: > description: >
Defaults to and overrides assistant.json's "settings" (and Defaults to and overrides assistant.json's "settings" (and if none,
if none, then model.json "settings") then model.json "settings")
parameters: parameters:
type: object type: object
description: > description: >
Defaults to and overrides assistant.json's "parameters" Defaults to and overrides assistant.json's "parameters" (and if
(and if none, then model.json "parameters") none, then model.json "parameters")
created: created:
type: integer type: integer
format: int64 format: int64
@ -141,7 +142,7 @@ components:
example: thread_abc123 example: thread_abc123
object: object:
type: string type: string
description: "Type of the object, indicating it's a thread." description: Type of the object, indicating it's a thread.
example: thread example: thread
created_at: created_at:
type: integer type: integer
@ -161,7 +162,7 @@ components:
example: thread_abc123 example: thread_abc123
object: object:
type: string type: string
description: "Type of the object, indicating the thread has been deleted." description: Type of the object, indicating the thread has been deleted.
example: thread.deleted example: thread.deleted
deleted: deleted:
type: boolean type: boolean

Binary file not shown.

After

Width:  |  Height:  |  Size: 110 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

View File

@ -4,14 +4,17 @@ import reflect from '@alumna/reflect'
import { FileManagerRoute } from '@janhq/core' import { FileManagerRoute } from '@janhq/core'
import { userSpacePath, getResourcePath } from './../utils/path' import { userSpacePath, getResourcePath } from './../utils/path'
import fs from 'fs'
import { join } from 'path'
import { FileStat } from '@janhq/core/.'
/** /**
* Handles file system extensions operations. * Handles file system extensions operations.
*/ */
export function handleFileMangerIPCs() { export function handleFileMangerIPCs() {
// Handles the 'synceFile' IPC event. This event is triggered to synchronize a file from a source path to a destination path. // Handles the 'syncFile' IPC event. This event is triggered to synchronize a file from a source path to a destination path.
ipcMain.handle( ipcMain.handle(
FileManagerRoute.synceFile, FileManagerRoute.syncFile,
async (_event, src: string, dest: string) => { async (_event, src: string, dest: string) => {
return reflect({ return reflect({
src, src,
@ -31,7 +34,33 @@ export function handleFileMangerIPCs() {
) )
// Handles the 'getResourcePath' IPC event. This event is triggered to get the resource path. // Handles the 'getResourcePath' IPC event. This event is triggered to get the resource path.
ipcMain.handle(FileManagerRoute.getResourcePath, async (_event) => { ipcMain.handle(FileManagerRoute.getResourcePath, async (_event) =>
return getResourcePath() getResourcePath()
}) )
// handle fs is directory here
ipcMain.handle(
FileManagerRoute.fileStat,
async (_event, path: string): Promise<FileStat | undefined> => {
const normalizedPath = path
.replace(`file://`, '')
.replace(`file:/`, '')
.replace(`file:\\\\`, '')
.replace(`file:\\`, '')
const fullPath = join(userSpacePath, normalizedPath)
const isExist = fs.existsSync(fullPath)
if (!isExist) return undefined
const isDirectory = fs.lstatSync(fullPath).isDirectory()
const size = fs.statSync(fullPath).size
const fileStat: FileStat = {
isDirectory,
size,
}
return fileStat
}
)
} }

View File

@ -1 +1 @@
0.1.32 0.1.34

View File

@ -1,3 +1,3 @@
@echo off @echo off
set /p NITRO_VERSION=<./bin/version.txt set /p NITRO_VERSION=<./bin/version.txt
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./bin/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/win-cuda-12-0 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda-11-4.tar.gz -e --strip 1 -o ./bin/win-cuda-11-4 && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu

View File

@ -8,7 +8,7 @@
"license": "AGPL-3.0", "license": "AGPL-3.0",
"scripts": { "scripts": {
"build": "tsc -b . && webpack --config webpack.config.js", "build": "tsc -b . && webpack --config webpack.config.js",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro", "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-4.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-4 && chmod +x ./bin/linux-cuda-11-4/nitro",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
"downloadnitro:win32": "download.bat", "downloadnitro:win32": "download.bat",
"downloadnitro": "run-script-os", "downloadnitro": "run-script-os",

View File

@ -30,7 +30,10 @@ export function requestInference(
signal: controller?.signal, signal: controller?.signal,
}) })
.then(async (response) => { .then(async (response) => {
if (model.parameters.stream) { if (model.parameters.stream === false) {
const data = await response.json();
subscriber.next(data.choices[0]?.message?.content ?? "");
} else {
const stream = response.body; const stream = response.body;
const decoder = new TextDecoder("utf-8"); const decoder = new TextDecoder("utf-8");
const reader = stream?.getReader(); const reader = stream?.getReader();
@ -54,9 +57,6 @@ export function requestInference(
} }
} }
} }
} else {
const data = await response.json();
subscriber.next(data.choices[0]?.message?.content ?? "");
} }
subscriber.complete(); subscriber.complete();
}) })

View File

@ -85,28 +85,40 @@ function checkFileExistenceInPaths(file: string, paths: string[]): boolean {
} }
function updateCudaExistence() { function updateCudaExistence() {
let files: string[]; let filesCuda12: string[];
let filesCuda11: string[];
let paths: string[]; let paths: string[];
let cudaVersion: string = "";
if (process.platform === "win32") { if (process.platform === "win32") {
files = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"]; filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []; paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
const nitro_cuda_path = path.join(__dirname, "bin", "win-cuda");
paths.push(nitro_cuda_path);
} else { } else {
files = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"]; filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
paths = process.env.LD_LIBRARY_PATH paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter) ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: []; : [];
const nitro_cuda_path = path.join(__dirname, "bin", "linux-cuda");
paths.push(nitro_cuda_path);
paths.push("/usr/lib/x86_64-linux-gnu/"); paths.push("/usr/lib/x86_64-linux-gnu/");
} }
let cudaExists = files.every( let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths) (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
); );
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
);
if (cudaExists) {
cudaVersion = "11";
}
}
else {
cudaVersion = "12";
}
let data; let data;
try { try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
@ -115,6 +127,7 @@ function updateCudaExistence() {
} }
data["cuda"].exist = cudaExists; data["cuda"].exist = cudaExists;
data["cuda"].version = cudaVersion;
if (cudaExists) { if (cudaExists) {
data.run_mode = "gpu"; data.run_mode = "gpu";
} }
@ -376,12 +389,17 @@ function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
let cudaVisibleDevices = ""; let cudaVisibleDevices = "";
let binaryName; let binaryName;
if (process.platform === "win32") { if (process.platform === "win32") {
let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
if (nvida_info["run_mode"] === "cpu") { if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "win-cpu"); binaryFolder = path.join(binaryFolder, "win-cpu");
} else { } else {
binaryFolder = path.join(binaryFolder, "win-cuda"); if (nvidiaInfo["cuda"].version === "12") {
cudaVisibleDevices = nvida_info["gpu_highest_vram"]; binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
}
else {
binaryFolder = path.join(binaryFolder, "win-cuda-11-4");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
} }
binaryName = "nitro.exe"; binaryName = "nitro.exe";
} else if (process.platform === "darwin") { } else if (process.platform === "darwin") {
@ -392,12 +410,17 @@ function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
} }
binaryName = "nitro"; binaryName = "nitro";
} else { } else {
let nvida_info = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8")); let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
if (nvida_info["run_mode"] === "cpu") { if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "linux-cpu"); binaryFolder = path.join(binaryFolder, "linux-cpu");
} else { } else {
binaryFolder = path.join(binaryFolder, "linux-cuda"); if (nvidiaInfo["cuda"].version === "12") {
cudaVisibleDevices = nvida_info["gpu_highest_vram"]; binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
}
else {
binaryFolder = path.join(binaryFolder, "linux-cuda-11-4");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
} }
binaryName = "nitro"; binaryName = "nitro";
} }

View File

@ -46,7 +46,10 @@ export function requestInference(
subscriber.complete(); subscriber.complete();
return; return;
} }
if (model.parameters.stream) { if (model.parameters.stream === false) {
const data = await response.json();
subscriber.next(data.choices[0]?.message?.content ?? "");
} else {
const stream = response.body; const stream = response.body;
const decoder = new TextDecoder("utf-8"); const decoder = new TextDecoder("utf-8");
const reader = stream?.getReader(); const reader = stream?.getReader();
@ -70,9 +73,6 @@ export function requestInference(
} }
} }
} }
} else {
const data = await response.json();
subscriber.next(data.choices[0]?.message?.content ?? "");
} }
subscriber.complete(); subscriber.complete();
}) })

View File

@ -1,6 +1,6 @@
{ {
"name": "@janhq/model-extension", "name": "@janhq/model-extension",
"version": "1.0.17", "version": "1.0.18",
"description": "Model Management Extension provides model exploration and seamless downloads", "description": "Model Management Extension provides model exploration and seamless downloads",
"main": "dist/index.js", "main": "dist/index.js",
"module": "dist/module.js", "module": "dist/module.js",

View File

@ -5,11 +5,12 @@ import {
abortDownload, abortDownload,
getResourcePath, getResourcePath,
getUserSpace, getUserSpace,
fileStat,
InferenceEngine, InferenceEngine,
joinPath, joinPath,
ModelExtension,
Model,
} from '@janhq/core' } from '@janhq/core'
import { ModelExtension, Model } from '@janhq/core'
import { baseName } from '@janhq/core/.'
/** /**
* A extension for models * A extension for models
@ -21,6 +22,9 @@ export default class JanModelExtension implements ModelExtension {
private static readonly _incompletedModelFileName = '.download' private static readonly _incompletedModelFileName = '.download'
private static readonly _offlineInferenceEngine = InferenceEngine.nitro private static readonly _offlineInferenceEngine = InferenceEngine.nitro
private static readonly _configDirName = 'config'
private static readonly _defaultModelFileName = 'default-model.json'
/** /**
* Implements type from JanExtension. * Implements type from JanExtension.
* @override * @override
@ -46,17 +50,16 @@ export default class JanModelExtension implements ModelExtension {
private async copyModelsToHomeDir() { private async copyModelsToHomeDir() {
try { try {
// list all of the files under the home directory
if (await fs.existsSync(JanModelExtension._homeDir)) { // Check for migration conditions
// ignore if the model is already downloaded if (
localStorage.getItem(`${EXTENSION_NAME}-version`) === VERSION &&
(await fs.existsSync(JanModelExtension._homeDir))
) {
// ignore if the there is no need to migrate
console.debug('Models already persisted.') console.debug('Models already persisted.')
return return
} }
// Get available models
const readyModels = (await this.getDownloadedModels()).map((e) => e.id)
// copy models folder from resources to home directory // copy models folder from resources to home directory
const resourePath = await getResourcePath() const resourePath = await getResourcePath()
const srcPath = await joinPath([resourePath, 'models']) const srcPath = await joinPath([resourePath, 'models'])
@ -68,18 +71,7 @@ export default class JanModelExtension implements ModelExtension {
console.debug('Finished syncing models') console.debug('Finished syncing models')
const reconfigureModels = (await this.getConfiguredModels()).filter((e) =>
readyModels.includes(e.id)
)
console.debug('Finished updating downloaded models')
// update back the status
await Promise.all(
reconfigureModels.map(async (model) => this.saveModel(model))
)
// Finished migration // Finished migration
localStorage.setItem(`${EXTENSION_NAME}-version`, VERSION) localStorage.setItem(`${EXTENSION_NAME}-version`, VERSION)
} catch (err) { } catch (err) {
console.error(err) console.error(err)
@ -199,7 +191,7 @@ export default class JanModelExtension implements ModelExtension {
): Promise<Model[]> { ): Promise<Model[]> {
try { try {
if (!(await fs.existsSync(JanModelExtension._homeDir))) { if (!(await fs.existsSync(JanModelExtension._homeDir))) {
console.debug('model folder not found') console.debug('Model folder not found')
return [] return []
} }
@ -220,6 +212,9 @@ export default class JanModelExtension implements ModelExtension {
dirName, dirName,
JanModelExtension._modelMetadataFileName, JanModelExtension._modelMetadataFileName,
]) ])
if (await fs.existsSync(jsonPath)) {
// if we have the model.json file, read it
let model = await this.readModelMetadata(jsonPath) let model = await this.readModelMetadata(jsonPath)
model = typeof model === 'object' ? model : JSON.parse(model) model = typeof model === 'object' ? model : JSON.parse(model)
@ -227,6 +222,12 @@ export default class JanModelExtension implements ModelExtension {
return return
} }
return model return model
} else {
// otherwise, we generate our own model file
// TODO: we might have more than one binary file here. This will be addressed with new version of Model file
// which is the PR from Hiro on branch Jan can see
return this.generateModelMetadata(dirName)
}
}) })
const results = await Promise.allSettled(readJsonPromises) const results = await Promise.allSettled(readJsonPromises)
const modelData = results.map((result) => { const modelData = results.map((result) => {
@ -254,6 +255,84 @@ export default class JanModelExtension implements ModelExtension {
return fs.readFileSync(path, 'utf-8') return fs.readFileSync(path, 'utf-8')
} }
/**
* Handle the case where we have the model directory but we don't have the corresponding
* model.json file associated with it.
*
* This function will create a model.json file for the model.
*
* @param dirName the director which reside in ~/jan/models but does not have model.json file.
*/
private async generateModelMetadata(dirName: string): Promise<Model> {
const files: string[] = await fs.readdirSync(
await joinPath([JanModelExtension._homeDir, dirName])
)
// sort files by name
files.sort()
// find the first file which is not a directory
let binaryFileName: string | undefined = undefined
let binaryFileSize: number | undefined = undefined
for (const file of files) {
if (file.endsWith(JanModelExtension._incompletedModelFileName)) continue
if (file.endsWith('.json')) continue
const path = await joinPath([JanModelExtension._homeDir, dirName, file])
const fileStats = await fileStat(path)
if (fileStats.isDirectory) continue
binaryFileSize = fileStats.size
binaryFileName = file
break
}
if (!binaryFileName) {
console.warn(`Unable to find binary file for model ${dirName}`)
return
}
const defaultModel = await this.getDefaultModel()
if (!defaultModel) {
console.error('Unable to find default model')
return
}
const model: Model = {
...defaultModel,
id: dirName,
name: dirName,
created: Date.now(),
description: `${dirName} - user self import model`,
}
const modelFilePath = await joinPath([
JanModelExtension._homeDir,
dirName,
JanModelExtension._modelMetadataFileName,
])
await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
return model
}
private async getDefaultModel(): Promise<Model | undefined> {
const defaultModelPath = await joinPath([
JanModelExtension._homeDir,
JanModelExtension._configDirName,
JanModelExtension._defaultModelFileName,
])
if (!(await fs.existsSync(defaultModelPath))) {
return undefined
}
const model = await this.readModelMetadata(defaultModelPath)
return typeof model === 'object' ? model : JSON.parse(model)
}
/** /**
* Gets all available models. * Gets all available models.
* @returns A Promise that resolves with an array of all models. * @returns A Promise that resolves with an array of all models.

View File

@ -0,0 +1,35 @@
{
"object": "model",
"version": 1,
"format": "gguf",
"source_url": "N/A",
"id": "N/A",
"name": "N/A",
"created": 0,
"description": "User self import model",
"settings": {
"ctx_len": 4096,
"ngl": 0,
"embedding": false,
"n_parallel": 0,
"cpu_threads": 0,
"prompt_template": ""
},
"parameters": {
"temperature": 0,
"token_limit": 0,
"top_k": 0,
"top_p": 0,
"stream": false,
"max_tokens": 4096,
"stop": [],
"frequency_penalty": 0,
"presence_penalty": 0
},
"metadata": {
"author": "User",
"tags": [],
"size": 0
},
"engine": "nitro"
}

View File

@ -0,0 +1,22 @@
{
"source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
"id": "dolphin-2.7-mixtral-8x7b",
"object": "model",
"name": "Dolphin 8x7B Q4",
"version": "1.0",
"description": "This model is an uncensored model based on Mixtral-8x7b. Dolphin is really good at coding",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Cognitive Computations, TheBloke",
"tags": ["70B", "Fintuned"],
"size": 26440000000
},
"engine": "nitro"
}

View File

@ -1,23 +0,0 @@
{
"source_url": "https://huggingface.co/TheBloke/lzlv_70B-GGUF/resolve/main/lzlv_70b_fp16_hf.Q5_K_M.gguf",
"id": "lzlv-70b",
"object": "model",
"name": "Lzlv 70B Q4",
"version": "1.0",
"description": "lzlv_70B is a sophisticated AI model designed for roleplaying and creative tasks. This merge aims to combine intelligence with creativity, seemingly outperforming its individual components in complex scenarios and creative outputs.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Lizpreciatior, The Bloke",
"tags": ["70B", "Finetuned"],
"size": 48750000000
},
"engine": "nitro"
}

View File

@ -1,5 +1,5 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf", "source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"id": "mistral-ins-7b-q4", "id": "mistral-ins-7b-q4",
"object": "model", "object": "model",
"name": "Mistral Instruct 7B Q4", "name": "Mistral Instruct 7B Q4",
@ -8,9 +8,6 @@
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"system_prompt": "",
"user_prompt": "<s>[INST]",
"ai_prompt": "[/INST]",
"prompt_template": "<s>[INST]{prompt}\n[/INST]" "prompt_template": "<s>[INST]{prompt}\n[/INST]"
}, },
"parameters": { "parameters": {

View File

@ -0,0 +1,22 @@
{
"source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf",
"id": "openchat-3.5-7b",
"object": "model",
"name": "Openchat-3.5 7B Q4",
"version": "1.0",
"description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Openchat",
"tags": ["7B", "Fintuned", "Featured"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,5 +1,5 @@
{ {
"source_url": "https://huggingface.co/janhq/stealth-v1.2-GGUF/resolve/main/stealth-v1.2.Q4_K_M.gguf", "source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf",
"id": "stealth-v1.2-7b", "id": "stealth-v1.2-7b",
"object": "model", "object": "model",
"name": "Stealth-v1.2 7B Q4", "name": "Stealth-v1.2 7B Q4",

View File

@ -1,5 +1,5 @@
{ {
"source_url": "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v0.6/resolve/main/ggml-model-q4_0.gguf", "source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"id": "tinyllama-1.1b", "id": "tinyllama-1.1b",
"object": "model", "object": "model",
"name": "TinyLlama Chat 1.1B Q4", "name": "TinyLlama Chat 1.1B Q4",
@ -16,7 +16,7 @@
"metadata": { "metadata": {
"author": "TinyLlama", "author": "TinyLlama",
"tags": ["Tiny", "Foundation Model"], "tags": ["Tiny", "Foundation Model"],
"size": 637000000 "size": 669000000
}, },
"engine": "nitro" "engine": "nitro"
} }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 MiB

View File

@ -1,22 +0,0 @@
{
"source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf",
"id": "trinity-v1-7b",
"object": "model",
"name": "Trinity-v1 7B Q4",
"version": "1.0",
"description": "Please use the latest version Trinity v1.2 for the best experience. Trinity is an experimental model merge of GreenNodeLM & LeoScorpius using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Merged"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -0,0 +1,22 @@
{
"source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf",
"id": "tulu-2-70b",
"object": "model",
"name": "Tulu 2 70B Q4",
"version": "1.0",
"description": "Tulu V2 DPO 70B is a fine-tuned version of Llama 2 using (DPO). This model is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "<|user|>\n{prompt}\n<|assistant|>"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Lizpreciatior, The Bloke",
"tags": ["70B", "Finetuned"],
"size": 41400000000
},
"engine": "nitro"
}

View File

@ -0,0 +1,23 @@
{
"source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf",
"id": "yarn-mistral-7b",
"object": "model",
"name": "Yarn Mistral 7B Q4",
"version": "1.0",
"description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "{prompt}"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "NousResearch, The Bloke",
"tags": ["7B","Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -17,6 +17,7 @@
"build": "tsc" "build": "tsc"
}, },
"dependencies": { "dependencies": {
"@alumna/reflect": "^1.1.3",
"@fastify/cors": "^8.4.2", "@fastify/cors": "^8.4.2",
"@fastify/static": "^6.12.0", "@fastify/static": "^6.12.0",
"@fastify/swagger": "^8.13.0", "@fastify/swagger": "^8.13.0",

View File

@ -36,28 +36,23 @@ const GPUDriverPrompt: React.FC = () => {
<Modal open={showNotification} onOpenChange={openChanged}> <Modal open={showNotification} onOpenChange={openChanged}>
<ModalContent> <ModalContent>
<ModalHeader> <ModalHeader>
<ModalTitle>Missing Nvidia Driver and Cuda Toolkit</ModalTitle> <ModalTitle>
Checking for machine that does not meet the requirements.
</ModalTitle>
</ModalHeader> </ModalHeader>
<p> <p>
It seems like you are missing Nvidia Driver or Cuda Toolkit or both. It appears that you are missing some dependencies required to run in
Please follow the instructions on the{' '} GPU mode. Please follow the instructions below for more details{' '}
<span <span
className="cursor-pointer text-blue-600" className="cursor-pointer text-blue-600"
onClick={() => onClick={() =>
openExternalUrl('https://developer.nvidia.com/cuda-toolkit') openExternalUrl(
'https://github.com/janhq/jan/blob/main/USAGE.md'
)
} }
> >
NVidia Cuda Toolkit Installation Page Jan running mode documentation
</span>{' '} </span>{' '}
and the{' '}
<span
className="cursor-pointer text-blue-600"
onClick={() =>
openExternalUrl('https://www.nvidia.com/Download/index.aspx')
}
>
Nvidia Driver Installation Page
</span>
. .
</p> </p>
<div className="flex items-center space-x-2"> <div className="flex items-center space-x-2">

View File

@ -22,8 +22,12 @@ export default function EventListenerWrapper({ children }: PropsWithChildren) {
const modelsRef = useRef(models) const modelsRef = useRef(models)
const { setDownloadedModels, downloadedModels } = useGetDownloadedModels() const { setDownloadedModels, downloadedModels } = useGetDownloadedModels()
const { setDownloadState, setDownloadStateSuccess, setDownloadStateFailed } = const {
useDownloadState() setDownloadState,
setDownloadStateSuccess,
setDownloadStateFailed,
setDownloadStateCancelled,
} = useDownloadState()
const downloadedModelRef = useRef(downloadedModels) const downloadedModelRef = useRef(downloadedModels)
useEffect(() => { useEffect(() => {
@ -52,13 +56,18 @@ export default function EventListenerWrapper({ children }: PropsWithChildren) {
window.electronAPI.onFileDownloadError( window.electronAPI.onFileDownloadError(
async (_event: string, state: any) => { async (_event: string, state: any) => {
if (state.err?.message !== 'aborted')
console.error('Download error', state)
const modelName = await baseName(state.fileName) const modelName = await baseName(state.fileName)
const model = modelsRef.current.find( const model = modelsRef.current.find(
(model) => modelBinFileName(model) === modelName (model) => modelBinFileName(model) === modelName
) )
if (model) setDownloadStateFailed(model.id) if (model) {
if (state.err?.message !== 'aborted') {
console.error('Download error', state)
setDownloadStateFailed(model.id, state.err.message)
} else {
setDownloadStateCancelled(model.id)
}
}
} }
) )

View File

@ -29,7 +29,28 @@ const setDownloadStateSuccessAtom = atom(null, (get, set, modelId: string) => {
}) })
}) })
const setDownloadStateFailedAtom = atom(null, (get, set, modelId: string) => { const setDownloadStateFailedAtom = atom(
null,
(get, set, modelId: string, error: string) => {
const currentState = { ...get(modelDownloadStateAtom) }
const state = currentState[modelId]
if (!state) {
console.debug(`Cannot find download state for ${modelId}`)
return
}
toaster({
title: 'Download Failed',
description: `Model ${modelId} download failed: ${error}`,
type: 'error',
})
delete currentState[modelId]
set(modelDownloadStateAtom, currentState)
}
)
const setDownloadStateCancelledAtom = atom(
null,
(get, set, modelId: string) => {
const currentState = { ...get(modelDownloadStateAtom) } const currentState = { ...get(modelDownloadStateAtom) }
const state = currentState[modelId] const state = currentState[modelId]
if (!state) { if (!state) {
@ -38,17 +59,20 @@ const setDownloadStateFailedAtom = atom(null, (get, set, modelId: string) => {
title: 'Cancel Download', title: 'Cancel Download',
description: `Model ${modelId} cancel download`, description: `Model ${modelId} cancel download`,
}) })
return return
} }
delete currentState[modelId] delete currentState[modelId]
set(modelDownloadStateAtom, currentState) set(modelDownloadStateAtom, currentState)
}) }
)
export function useDownloadState() { export function useDownloadState() {
const modelDownloadState = useAtomValue(modelDownloadStateAtom) const modelDownloadState = useAtomValue(modelDownloadStateAtom)
const setDownloadState = useSetAtom(setDownloadStateAtom) const setDownloadState = useSetAtom(setDownloadStateAtom)
const setDownloadStateSuccess = useSetAtom(setDownloadStateSuccessAtom) const setDownloadStateSuccess = useSetAtom(setDownloadStateSuccessAtom)
const setDownloadStateFailed = useSetAtom(setDownloadStateFailedAtom) const setDownloadStateFailed = useSetAtom(setDownloadStateFailedAtom)
const setDownloadStateCancelled = useSetAtom(setDownloadStateCancelledAtom)
const downloadStates: DownloadState[] = [] const downloadStates: DownloadState[] = []
for (const [, value] of Object.entries(modelDownloadState)) { for (const [, value] of Object.entries(modelDownloadState)) {
@ -61,6 +85,7 @@ export function useDownloadState() {
setDownloadState, setDownloadState,
setDownloadStateSuccess, setDownloadStateSuccess,
setDownloadStateFailed, setDownloadStateFailed,
setDownloadStateCancelled,
downloadStates, downloadStates,
} }
} }

View File

@ -72,7 +72,7 @@ export const presetConfiguration: Record<string, SettingComponentData> = {
stream: { stream: {
name: 'stream', name: 'stream',
title: 'Stream', title: 'Stream',
description: 'Stream', description: 'Enable real-time data processing for faster predictions.',
controllerType: 'checkbox', controllerType: 'checkbox',
controllerData: { controllerData: {
checked: false, checked: false,

View File

@ -19,7 +19,7 @@ import DropdownListSidebar, {
import { useCreateNewThread } from '@/hooks/useCreateNewThread' import { useCreateNewThread } from '@/hooks/useCreateNewThread'
import { getConfigurationsData } from '@/utils/componentSettings' import { getConfigurationsData } from '@/utils/componentSettings'
import { toSettingParams } from '@/utils/model_param' import { toRuntimeParams, toSettingParams } from '@/utils/model_param'
import EngineSetting from '../EngineSetting' import EngineSetting from '../EngineSetting'
import ModelSetting from '../ModelSetting' import ModelSetting from '../ModelSetting'
@ -44,7 +44,9 @@ const Sidebar: React.FC = () => {
const threadStates = useAtomValue(threadStatesAtom) const threadStates = useAtomValue(threadStatesAtom)
const modelEngineParams = toSettingParams(activeModelParams) const modelEngineParams = toSettingParams(activeModelParams)
const modelRuntimeParams = toRuntimeParams(activeModelParams)
const componentDataEngineSetting = getConfigurationsData(modelEngineParams) const componentDataEngineSetting = getConfigurationsData(modelEngineParams)
const componentDataRuntimeSetting = getConfigurationsData(modelRuntimeParams)
const onReviewInFinderClick = async (type: string) => { const onReviewInFinderClick = async (type: string) => {
if (!activeThread) return if (!activeThread) return
@ -224,6 +226,7 @@ const Sidebar: React.FC = () => {
<DropdownListSidebar /> <DropdownListSidebar />
</div> </div>
{componentDataRuntimeSetting.length !== 0 && (
<div className="mt-6"> <div className="mt-6">
<CardSidebar title="Inference Parameters" asChild> <CardSidebar title="Inference Parameters" asChild>
<div className="px-2 py-4"> <div className="px-2 py-4">
@ -231,7 +234,11 @@ const Sidebar: React.FC = () => {
</div> </div>
</CardSidebar> </CardSidebar>
</div> </div>
)}
{componentDataEngineSetting.filter(
(x) => x.name === 'prompt_template'
).length !== 0 && (
<div className="mt-4"> <div className="mt-4">
<CardSidebar title="Model Parameters" asChild> <CardSidebar title="Model Parameters" asChild>
<div className="px-2 py-4"> <div className="px-2 py-4">
@ -239,7 +246,9 @@ const Sidebar: React.FC = () => {
</div> </div>
</CardSidebar> </CardSidebar>
</div> </div>
)}
{componentDataEngineSetting.length !== 0 && (
<div className="my-4"> <div className="my-4">
<CardSidebar <CardSidebar
title="Engine Parameters" title="Engine Parameters"
@ -252,6 +261,7 @@ const Sidebar: React.FC = () => {
</div> </div>
</CardSidebar> </CardSidebar>
</div> </div>
)}
</div> </div>
</CardSidebar> </CardSidebar>
</div> </div>

View File

@ -5,7 +5,7 @@ import { ChatCompletionRole, MessageStatus, ThreadMessage } from '@janhq/core'
import hljs from 'highlight.js' import hljs from 'highlight.js'
import { useAtomValue } from 'jotai' import { useAtomValue } from 'jotai'
import { Marked } from 'marked' import { Marked, Renderer } from 'marked'
import { markedHighlight } from 'marked-highlight' import { markedHighlight } from 'marked-highlight'
@ -30,7 +30,7 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
} }
const clipboard = useClipboard({ timeout: 1000 }) const clipboard = useClipboard({ timeout: 1000 })
const marked = new Marked( const marked: Marked = new Marked(
markedHighlight({ markedHighlight({
langPrefix: 'hljs', langPrefix: 'hljs',
highlight(code, lang) { highlight(code, lang) {
@ -46,6 +46,11 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
}), }),
{ {
renderer: { renderer: {
link: (href, title, text) => {
return Renderer.prototype.link
?.apply(this, [href, title, text])
.replace('<a', "<a target='_blank'")
},
code(code, lang, escaped) { code(code, lang, escaped) {
return ` return `
<div class="relative code-block group/item"> <div class="relative code-block group/item">

View File

@ -5,6 +5,7 @@ import {
ModalTrigger, ModalTrigger,
ModalClose, ModalClose,
ModalFooter, ModalFooter,
ModalPortal,
ModalContent, ModalContent,
ModalHeader, ModalHeader,
ModalTitle, ModalTitle,
@ -89,7 +90,9 @@ export default function ThreadList() {
className={twMerge( className={twMerge(
`group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-gray-100 hover:dark:bg-secondary/50` `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-gray-100 hover:dark:bg-secondary/50`
)} )}
onClick={() => onThreadClick(thread)} onClick={() => {
onThreadClick(thread)
}}
> >
<div className="relative z-10 p-4 py-4"> <div className="relative z-10 p-4 py-4">
<div className="flex justify-between"> <div className="flex justify-between">
@ -111,7 +114,7 @@ export default function ThreadList() {
<MoreVerticalIcon /> <MoreVerticalIcon />
<div className="invisible absolute right-0 z-20 w-40 overflow-hidden rounded-lg border border-border bg-background shadow-lg group-hover/icon:visible"> <div className="invisible absolute right-0 z-20 w-40 overflow-hidden rounded-lg border border-border bg-background shadow-lg group-hover/icon:visible">
<Modal> <Modal>
<ModalTrigger asChild> <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
<div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary"> <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
<Paintbrush <Paintbrush
size={16} size={16}
@ -122,6 +125,7 @@ export default function ThreadList() {
</span> </span>
</div> </div>
</ModalTrigger> </ModalTrigger>
<ModalPortal />
<ModalContent> <ModalContent>
<ModalHeader> <ModalHeader>
<ModalTitle>Clean Thread</ModalTitle> <ModalTitle>Clean Thread</ModalTitle>
@ -129,13 +133,19 @@ export default function ThreadList() {
<p>Are you sure you want to clean this thread?</p> <p>Are you sure you want to clean this thread?</p>
<ModalFooter> <ModalFooter>
<div className="flex gap-x-2"> <div className="flex gap-x-2">
<ModalClose asChild> <ModalClose
asChild
onClick={(e) => e.stopPropagation()}
>
<Button themes="ghost">No</Button> <Button themes="ghost">No</Button>
</ModalClose> </ModalClose>
<ModalClose asChild> <ModalClose asChild>
<Button <Button
themes="danger" themes="danger"
onClick={() => cleanThread(thread.id)} onClick={(e) => {
e.stopPropagation()
cleanThread(thread.id)
}}
autoFocus autoFocus
> >
Yes Yes
@ -145,9 +155,8 @@ export default function ThreadList() {
</ModalFooter> </ModalFooter>
</ModalContent> </ModalContent>
</Modal> </Modal>
<Modal> <Modal>
<ModalTrigger asChild> <ModalTrigger asChild onClick={(e) => e.stopPropagation()}>
<div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary"> <div className="flex cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-secondary">
<Trash2Icon <Trash2Icon
size={16} size={16}
@ -158,6 +167,7 @@ export default function ThreadList() {
</span> </span>
</div> </div>
</ModalTrigger> </ModalTrigger>
<ModalPortal />
<ModalContent> <ModalContent>
<ModalHeader> <ModalHeader>
<ModalTitle>Delete Thread</ModalTitle> <ModalTitle>Delete Thread</ModalTitle>
@ -168,14 +178,20 @@ export default function ThreadList() {
</p> </p>
<ModalFooter> <ModalFooter>
<div className="flex gap-x-2"> <div className="flex gap-x-2">
<ModalClose asChild> <ModalClose
asChild
onClick={(e) => e.stopPropagation()}
>
<Button themes="ghost">No</Button> <Button themes="ghost">No</Button>
</ModalClose> </ModalClose>
<ModalClose asChild> <ModalClose asChild>
<Button <Button
autoFocus autoFocus
themes="danger" themes="danger"
onClick={() => deleteThread(thread.id)} onClick={(e) => {
e.stopPropagation()
deleteThread(thread.id)
}}
> >
Yes Yes
</Button> </Button>

View File

@ -1,4 +1,3 @@
/* eslint-disable @typescript-eslint/naming-convention */
import { Model } from '@janhq/core' import { Model } from '@janhq/core'
import ExploreModelItem from '@/screens/ExploreModels/ExploreModelItem' import ExploreModelItem from '@/screens/ExploreModels/ExploreModelItem'
@ -8,33 +7,45 @@ type Props = {
} }
const ExploreModelList: React.FC<Props> = ({ models }) => { const ExploreModelList: React.FC<Props> = ({ models }) => {
const sortOrder: Record<string, number> = { const takenModelIds: string[] = []
'7b': 1, const featuredModels = models
'13b': 2, .filter((m) => {
'34b': 3, if (m.metadata.tags.includes('Featured')) {
'70b': 4, takenModelIds.push(m.id)
'120b': 5, return m
'tiny': 6,
} }
const sortedModels = models?.sort((a, b) => {
const aIsFeatured = a.metadata.tags.includes('Featured')
const bIsFeatured = b.metadata.tags.includes('Featured')
const aIsRecommended = a.metadata.tags.includes('Recommended')
const bIsRecommended = b.metadata.tags.includes('Recommended')
const aNumericTag =
a.metadata.tags.find((tag) => !!sortOrder[tag.toLowerCase()]) ?? 'Tiny'
const bNumericTag =
b.metadata.tags.find((tag) => !!sortOrder[tag.toLowerCase()]) ?? 'Tiny'
if (aIsFeatured !== bIsFeatured) return aIsFeatured ? -1 : 1
if (aNumericTag !== bNumericTag)
return (
sortOrder[aNumericTag.toLowerCase()] -
sortOrder[bNumericTag.toLowerCase()]
)
if (aIsRecommended !== bIsRecommended) return aIsRecommended ? -1 : 1
return a.metadata.size - b.metadata.size
}) })
.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
const recommendedModels = models
.filter((m) => {
if (m.metadata.tags.includes('Recommended')) {
takenModelIds.push(m.id)
return m
}
})
.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
const openAiModels = models
.filter((m) => {
if (m.engine === 'openai') {
takenModelIds.push(m.id)
return m
}
})
.sort((m1: Model, m2: Model) => m1.name.localeCompare(m2.name))
const remainingModels = models
.filter((m) => !takenModelIds.includes(m.id))
.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
const sortedModels: Model[] = [
...featuredModels,
...recommendedModels,
...openAiModels,
...remainingModels,
]
return ( return (
<div className="relative h-full w-full flex-shrink-0"> <div className="relative h-full w-full flex-shrink-0">
{sortedModels?.map((model) => ( {sortedModels?.map((model) => (

View File

@ -1,5 +1,6 @@
import { useState } from 'react' import { useState } from 'react'
import { openExternalUrl } from '@janhq/core'
import { import {
Input, Input,
ScrollArea, ScrollArea,
@ -44,6 +45,10 @@ const ExploreModelsScreen = () => {
} }
}) })
const onHowToImportModelClick = () => {
openExternalUrl('https://jan.ai/guides/using-models/import-manually/')
}
if (loading) return <Loader description="loading ..." /> if (loading) return <Loader description="loading ..." />
return ( return (
@ -72,13 +77,12 @@ const ExploreModelsScreen = () => {
/> />
</div> </div>
<div className="mt-2 text-center"> <div className="mt-2 text-center">
<a <p
href="https://jan.ai/guides/using-models/import-manually/" onClick={onHowToImportModelClick}
target="_blank" className="cursor-pointer font-semibold text-white underline"
className="font-semibold text-white underline"
> >
How to manually import models How to manually import models
</a> </p>
</div> </div>
</div> </div>
</div> </div>