docs: update API Reference page

This commit is contained in:
hieu-jan 2024-01-24 01:40:08 +09:00
parent fe1baed116
commit 0afdee4a98
2 changed files with 150 additions and 275 deletions

View File

@ -67,21 +67,32 @@ paths:
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
curl http://localhost:1337/v1/chat/completions \ curl -X 'POST' \
-H "Content-Type: application/json" \ 'http://127.0.0.1:1337/v1/chat/completions' \
-H 'accept: application/json' \
-H 'Content-Type: application/json' \
-d '{ -d '{
"model": "tinyllama-1.1b", "messages": [
"messages": [ {
{ "content": "You are a helpful assistant.",
"role": "system", "role": "system"
"content": "You are a helpful assistant." },
}, {
{ "content": "Hello!",
"role": "user", "role": "user"
"content": "Hello!" }
} ],
] "model": "tinyllama-1.1b",
}' "stream": true,
"max_tokens": 2048,
"stop": [
"hello"
],
"frequency_penalty": 0,
"presence_penalty": 0,
"temperature": 0.7,
"top_p": 0.95
}'
/models: /models:
get: get:
operationId: listModels operationId: listModels
@ -103,7 +114,9 @@ paths:
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
curl http://localhost:1337/v1/models curl -X 'GET' \
'http://127.0.0.1:1337/v1/models' \
-H 'accept: application/json'
"/models/download/{model_id}": "/models/download/{model_id}":
get: get:
operationId: downloadModel operationId: downloadModel
@ -131,7 +144,9 @@ paths:
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
curl -X POST http://localhost:1337/v1/models/download/{model_id} curl -X 'GET' \
'http://127.0.0.1:1337/v1/models/download/{model_id}' \
-H 'accept: application/json'
"/models/{model_id}": "/models/{model_id}":
get: get:
operationId: retrieveModel operationId: retrieveModel
@ -162,7 +177,9 @@ paths:
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
curl http://localhost:1337/v1/models/{model_id} curl -X 'GET' \
'http://127.0.0.1:1337/v1/models/{model_id}' \
-H 'accept: application/json'
delete: delete:
operationId: deleteModel operationId: deleteModel
tags: tags:
@ -191,7 +208,9 @@ paths:
x-codeSamples: x-codeSamples:
- lang: cURL - lang: cURL
source: | source: |
curl -X DELETE http://localhost:1337/v1/models/{model_id} curl -X 'DELETE' \
'http://127.0.0.1:1337/v1/models/{model_id}' \
-H 'accept: application/json'
/threads: /threads:
post: post:
operationId: createThread operationId: createThread

View File

@ -18,106 +18,77 @@ components:
Model: Model:
type: object type: object
properties: properties:
type:
type: string
default: model
description: The type of the object.
version:
type: string
default: "1"
description: The version number of the model.
id:
type: string
description: Unique identifier used in chat-completions model_name, matches
folder name.
example: zephyr-7b
name:
type: string
description: Name of the model.
example: Zephyr 7B
owned_by:
type: string
description: Compatibility field for OpenAI.
default: ""
created:
type: integer
format: int64
description: Unix timestamp representing the creation time.
description:
type: string
description: Description of the model.
state:
type: string
enum:
- null
- downloading
- ready
- starting
- stopping
description: Current state of the model.
format:
type: string
description: State format of the model, distinct from the engine.
example: ggufv3
source_url: source_url:
type: string type: string
format: uri format: uri
description: URL to the source of the model. description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf
id:
type: string
description:
Unique identifier used in chat-completions model_name, matches
folder name.
example: trinity-v1.2-7b
object:
type: string
example: model
name:
type: string
description: Name of the model.
example: Trinity-v1.2 7B Q4
version:
type: string
default: "1.0"
description: The version number of the model.
description:
type: string
description: Description of the model.
example: Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.
format:
type: string
description: State format of the model, distinct from the engine.
example: gguf
settings: settings:
type: object type: object
properties: properties:
ctx_len: ctx_len:
type: string type: integer
description: Context length. description: Context length.
example: "2048" example: 4096
ngl: prompt_template:
type: string type: string
description: Number of layers. example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
example: "100"
embedding:
type: string
description: Indicates if embedding is enabled.
example: "true"
n_parallel:
type: string
description: Number of parallel processes.
example: "4"
additionalProperties: false additionalProperties: false
parameters: parameters:
type: object type: object
properties: properties:
temperature: temperature:
type: string example: 0.7
description: Temperature setting for the model.
example: "0.7"
token_limit:
type: string
description: Token limit for the model.
example: "2048"
top_k:
type: string
description: Top-k setting for the model.
example: "0"
top_p: top_p:
type: string example: 0.95
description: Top-p setting for the model.
example: "1"
stream: stream:
type: string example: true
description: Indicates if streaming is enabled. max_tokens:
example: "true" example: 4096
stop:
example: []
frequency_penalty:
example: 0
presence_penalty:
example: 0
additionalProperties: false additionalProperties: false
metadata: metadata:
type: object author:
description: Additional metadata.
assets:
type: array
items:
type: string type: string
description: List of assets related to the model. example: Jan
required: tags:
- source_url example: ["7B", "Merged", "Featured"]
size:
example: 4370000000,
cover:
example: "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
engine:
example: nitro
ModelObject: ModelObject:
type: object type: object
properties: properties:
@ -125,7 +96,7 @@ components:
type: string type: string
description: | description: |
The identifier of the model. The identifier of the model.
example: zephyr-7b example: ztrinity-v1.2-7b
object: object:
type: string type: string
description: | description: |
@ -145,197 +116,82 @@ components:
GetModelResponse: GetModelResponse:
type: object type: object
properties: properties:
id:
type: string
description: The identifier of the model.
example: zephyr-7b
object:
type: string
description: Type of the object, indicating it's a model.
default: model
created:
type: integer
format: int64
description: Unix timestamp representing the creation time of the model.
owned_by:
type: string
description: The entity that owns the model.
example: _
state:
type: string
enum:
- not_downloaded
- downloaded
- running
- stopped
description: The current state of the model.
source_url: source_url:
type: string type: string
format: uri format: uri
description: URL to the source of the model. description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf
engine_parameters: id:
type: object type: string
properties: description:
pre_prompt: Unique identifier used in chat-completions model_name, matches
type: string folder name.
description: Predefined prompt used for setting up internal configurations. example: mistral-ins-7b-q4
default: "" object:
example: Initial setup complete. type: string
system_prompt: example: model
type: string name:
description: Prefix used for system-level prompts. type: string
default: "SYSTEM: " description: Name of the model.
user_prompt: example: Mistral Instruct 7B Q4
type: string version:
description: Prefix used for user prompts. type: string
default: "USER: " default: "1.0"
ai_prompt: description: The version number of the model.
type: string description:
description: Prefix used for assistant prompts. type: string
default: "ASSISTANT: " description: Description of the model.
ngl: example: Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.
type: integer format:
description: Number of neural network layers loaded onto the GPU for type: string
acceleration. description: State format of the model, distinct from the engine.
minimum: 0 example: gguf
maximum: 100 settings:
default: 100
example: 100
ctx_len:
type: integer
description: Context length for model operations, varies based on the specific
model.
minimum: 128
maximum: 4096
default: 2048
example: 2048
n_parallel:
type: integer
description: Number of parallel operations, relevant when continuous batching is
enabled.
minimum: 1
maximum: 10
default: 1
example: 4
cont_batching:
type: boolean
description: Indicates if continuous batching is used for processing.
default: false
example: false
cpu_threads:
type: integer
description: Number of threads allocated for CPU-based inference.
minimum: 1
example: 8
embedding:
type: boolean
description: Indicates if embedding layers are enabled in the model.
default: true
example: true
model_parameters:
type: object type: object
properties: properties:
ctx_len: ctx_len:
type: integer type: integer
description: Maximum context length the model can handle. description: Context length.
minimum: 0 example: 4096
maximum: 4096 prompt_template:
default: 2048 type: string
example: 2048 example: "[INST] {prompt} [/INST]"
ngl: additionalProperties: false
type: integer parameters:
description: Number of layers in the neural network. type: object
minimum: 1 properties:
maximum: 100
default: 100
example: 100
embedding:
type: boolean
description: Indicates if embedding layers are used.
default: true
example: true
n_parallel:
type: integer
description: Number of parallel processes the model can run.
minimum: 1
maximum: 10
default: 1
example: 4
temperature: temperature:
type: number
description: Controls randomness in model's responses. Higher values lead to
more random responses.
minimum: 0
maximum: 2
default: 0.7
example: 0.7 example: 0.7
token_limit:
type: integer
description: Maximum number of tokens the model can generate in a single
response.
minimum: 1
maximum: 4096
default: 2048
example: 2048
top_k:
type: integer
description: Limits the model to consider only the top k most likely next tokens
at each step.
minimum: 0
maximum: 100
default: 0
example: 0
top_p: top_p:
type: number example: 0.95
description: Nucleus sampling parameter. The model considers the smallest set of stream:
tokens whose cumulative probability exceeds the top_p value. example: true
minimum: 0 max_tokens:
maximum: 1 example: 4096
default: 1 stop:
example: 1 example: []
frequency_penalty:
example: 0
presence_penalty:
example: 0
additionalProperties: false
metadata: metadata:
type: object author:
properties: type: string
engine: example: MistralAI
type: string tags:
description: The engine used by the model. example: ["7B", "Featured", "Foundation Model"]
enum: size:
- nitro example: 4370000000,
- openai cover:
- hf_inference example: "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
quantization: engine:
type: string example: nitro
description: Quantization parameter of the model.
example: Q3_K_L
size:
type: string
description: Size of the model.
example: 7B
required:
- id
- object
- created
- owned_by
- state
- source_url
- parameters
- metadata
DeleteModelResponse: DeleteModelResponse:
type: object type: object
properties: properties:
id: message:
type: string example: Not found
description: The identifier of the model that was deleted.
example: model-zephyr-7B
object:
type: string
description: Type of the object, indicating it's a model.
default: model
deleted:
type: boolean
description: Indicates whether the model was successfully deleted.
example: true
StartModelResponse: StartModelResponse:
type: object type: object
properties: properties: