Merge branch 'main' into update-model-hub
This commit is contained in:
commit
d9e18f5a3f
60
docs/docs/specs/engineering/engine.md
Normal file
60
docs/docs/specs/engineering/engine.md
Normal file
@ -0,0 +1,60 @@
|
|||||||
|
---
|
||||||
|
title: Engine
|
||||||
|
slug: /specs/engine
|
||||||
|
---
|
||||||
|
|
||||||
|
:::caution
|
||||||
|
|
||||||
|
Currently Under Development
|
||||||
|
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
In the Jan application, engines serve as primary entities with the following capabilities:
|
||||||
|
|
||||||
|
- Engine will be installed through `inference-extensions`.
|
||||||
|
- Models will depend on engines to do [inference](https://en.wikipedia.org/wiki/Inference_engine).
|
||||||
|
- Engine configuration and required metadata will be stored in a json file.
|
||||||
|
|
||||||
|
## Folder Structure
|
||||||
|
|
||||||
|
- Default parameters for engines are stored in JSON files located in the `/engines` folder.
|
||||||
|
- These parameter files are named uniquely with `engine_id`.
|
||||||
|
- Engines are referenced directly using `engine_id` in the `model.json` file.
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
jan/
|
||||||
|
engines/
|
||||||
|
nitro.json
|
||||||
|
openai.json
|
||||||
|
.....
|
||||||
|
```
|
||||||
|
|
||||||
|
## Engine Default Parameter Files
|
||||||
|
|
||||||
|
- Each inference engine requires default parameters to function in cases where user-provided parameters are absent.
|
||||||
|
- These parameters are stored in JSON files, structured as simple key-value pairs.
|
||||||
|
|
||||||
|
### Example
|
||||||
|
|
||||||
|
Here is an example of an engine file for `engine_id` `nitro`:
|
||||||
|
|
||||||
|
```js
|
||||||
|
{
|
||||||
|
"ctx_len": 512,
|
||||||
|
"ngl": 100,
|
||||||
|
"embedding": false,
|
||||||
|
"n_parallel": 1,
|
||||||
|
"cont_batching": false
|
||||||
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For detailed engine parameters, refer to: [Nitro's Model Settings](https://nitro.jan.ai/features/load-unload#table-of-parameters)
|
||||||
|
|
||||||
|
## Adding an Engine
|
||||||
|
|
||||||
|
- Engine parameter files are automatically generated upon installing an `inference-extension` in the Jan application.
|
||||||
|
|
||||||
|
---
|
||||||
@ -53,9 +53,9 @@ jan/ # Jan root folder
|
|||||||
|
|
||||||
Here's a standard example `model.json` for a GGUF model.
|
Here's a standard example `model.json` for a GGUF model.
|
||||||
|
|
||||||
- `source_url`: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/.
|
|
||||||
|
|
||||||
```js
|
```js
|
||||||
|
{
|
||||||
"id": "zephyr-7b", // Defaults to foldername
|
"id": "zephyr-7b", // Defaults to foldername
|
||||||
"object": "model", // Defaults to "model"
|
"object": "model", // Defaults to "model"
|
||||||
"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
|
"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
|
||||||
@ -64,15 +64,16 @@ Here's a standard example `model.json` for a GGUF model.
|
|||||||
"version": "1", // Defaults to 1
|
"version": "1", // Defaults to 1
|
||||||
"created": 1231231, // Defaults to file creation time
|
"created": 1231231, // Defaults to file creation time
|
||||||
"description": null, // Defaults to null
|
"description": null, // Defaults to null
|
||||||
"state": enum[null, "downloading", "ready", "starting", "stopping", ...]
|
"state": enum[null, "ready"]
|
||||||
"format": "ggufv3", // Defaults to "ggufv3"
|
"format": "ggufv3", // Defaults to "ggufv3"
|
||||||
"settings": { // Models are initialized with settings
|
"engine": "nitro", // engine_id specified in jan/engine folder
|
||||||
"ctx_len": 2048,
|
"engine_parameters": { // Engine parameters inside model.json can override
|
||||||
|
"ctx_len": 2048, // the value inside the base engine.json
|
||||||
"ngl": 100,
|
"ngl": 100,
|
||||||
"embedding": true,
|
"embedding": true,
|
||||||
"n_parallel": 4,
|
"n_parallel": 4,
|
||||||
},
|
},
|
||||||
"parameters": { // Models are called parameters
|
"model_parameters": { // Models are called parameters
|
||||||
"stream": true,
|
"stream": true,
|
||||||
"max_tokens": 2048,
|
"max_tokens": 2048,
|
||||||
"stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
|
"stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
|
||||||
@ -85,9 +86,10 @@ Here's a standard example `model.json` for a GGUF model.
|
|||||||
"assets": [ // Defaults to current dir
|
"assets": [ // Defaults to current dir
|
||||||
"file://.../zephyr-7b-q4_k_m.bin",
|
"file://.../zephyr-7b-q4_k_m.bin",
|
||||||
]
|
]
|
||||||
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
The model settings in the example can be found at: [Nitro's model settings](https://nitro.jan.ai/features/load-unload#table-of-parameters)
|
The engine parameters in the example can be found at: [Nitro's model settings](https://nitro.jan.ai/features/load-unload#table-of-parameters)
|
||||||
|
|
||||||
The model parameters in the example can be found at: [Nitro's model parameters](https://nitro.jan.ai/api-reference#tag/Chat-Completion)
|
The model parameters in the example can be found at: [Nitro's model parameters](https://nitro.jan.ai/api-reference#tag/Chat-Completion)
|
||||||
|
|
||||||
|
|||||||
@ -81,6 +81,7 @@ const sidebars = {
|
|||||||
items: [
|
items: [
|
||||||
"specs/engineering/chats",
|
"specs/engineering/chats",
|
||||||
"specs/engineering/models",
|
"specs/engineering/models",
|
||||||
|
"specs/engineering/engine",
|
||||||
"specs/engineering/threads",
|
"specs/engineering/threads",
|
||||||
"specs/engineering/messages",
|
"specs/engineering/messages",
|
||||||
"specs/engineering/assistants",
|
"specs/engineering/assistants",
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
@echo off
|
@echo off
|
||||||
set /p NITRO_VERSION=<./nitro/version.txt
|
set /p NITRO_VERSION=<./nitro/version.txt
|
||||||
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.zip -e --strip 1 -o ./nitro/win-cuda
|
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda
|
||||||
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.zip -e --strip 1 -o ./nitro/win-cpu
|
.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user