* feat: Support multiple model binaries * fix: Update downloadModel with multiple binaries handler * feat: Add 3 models with multiple binaries * chore: fix model download * fix: model file lookup & model path * chore: add .prettierrc * chore: refactor docs * chore: bump model version * fix(capybara): add filename * fix(codeninja): add file name + llama model path * fix(default): add llama model path * fix(deepseek coder): add filename * fix(deepseek 33B): add filename * fix(dolphin mixtral): add filename * fix(llama2-chat): add filename * fix(llama2-70B): add filename * fix(mistral 7b): add filename + model path * fix(bakllava): correct size model * fix(llava-7b): correct size model * fix(llava-13b): correct size model * fix(mixtral-8x7b): add file name + modelpath * fix(noramaid-7b): add file name + modelpath * fix(openchat-7b): add file name + modelpath * fix(openhermes-7b): add file name + modelpath * fix(phi2-3b): add file name + modelpath * fix(phind): add file name + modelpath * fix(solarslerp): add file name + modelpath * fix(starling): add file name + modelpath * fix(stealth): add file name + modelpath * fix(tinyllama): add file name + modelpath * fix(trinity): add file name + modelpath * fix(tulu): add file name + modelpath * fix(wizardcoder): add file name + modelpath * fix(yi): add file name + modelpath * update from source -> sources Signed-off-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Co-authored-by: hiro <vuonghoainam.work@gmail.com> Co-authored-by: hahuyhoang411 <hahuyhoanghhh41@gmail.com> Co-authored-by: James <james@jan.ai>
400 lines
12 KiB
YAML
400 lines
12 KiB
YAML
---
|
|
components:
|
|
schemas:
|
|
ListModelsResponse:
|
|
type: object
|
|
properties:
|
|
object:
|
|
type: string
|
|
enum:
|
|
- list
|
|
data:
|
|
type: array
|
|
items:
|
|
$ref: "#/components/schemas/Model"
|
|
required:
|
|
- object
|
|
- data
|
|
Model:
|
|
type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
default: model
|
|
description: The type of the object.
|
|
version:
|
|
type: string
|
|
default: "1"
|
|
description: The version number of the model.
|
|
id:
|
|
type: string
|
|
description: Unique identifier used in chat-completions model_name, matches
|
|
folder name.
|
|
example: zephyr-7b
|
|
name:
|
|
type: string
|
|
description: Name of the model.
|
|
example: Zephyr 7B
|
|
owned_by:
|
|
type: string
|
|
description: Compatibility field for OpenAI.
|
|
default: ""
|
|
created:
|
|
type: integer
|
|
format: int64
|
|
description: Unix timestamp representing the creation time.
|
|
description:
|
|
type: string
|
|
description: Description of the model.
|
|
state:
|
|
type: string
|
|
enum:
|
|
- null
|
|
- downloading
|
|
- ready
|
|
- starting
|
|
- stopping
|
|
description: Current state of the model.
|
|
format:
|
|
type: string
|
|
description: State format of the model, distinct from the engine.
|
|
example: ggufv3
|
|
source:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
url:
|
|
format: uri
|
|
description: URL to the source of the model.
|
|
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
|
|
filename:
|
|
type: string
|
|
description: Filename of the model.
|
|
example: zephyr-7b-beta.Q4_K_M.gguf
|
|
settings:
|
|
type: object
|
|
properties:
|
|
ctx_len:
|
|
type: string
|
|
description: Context length.
|
|
example: "4096"
|
|
ngl:
|
|
type: string
|
|
description: Number of layers.
|
|
example: "100"
|
|
embedding:
|
|
type: string
|
|
description: Indicates if embedding is enabled.
|
|
example: "true"
|
|
n_parallel:
|
|
type: string
|
|
description: Number of parallel processes.
|
|
example: "4"
|
|
additionalProperties: false
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
temperature:
|
|
type: string
|
|
description: Temperature setting for the model.
|
|
example: "0.7"
|
|
token_limit:
|
|
type: string
|
|
description: Token limit for the model.
|
|
example: "4096"
|
|
top_k:
|
|
type: string
|
|
description: Top-k setting for the model.
|
|
example: "0"
|
|
top_p:
|
|
type: string
|
|
description: Top-p setting for the model.
|
|
example: "1"
|
|
stream:
|
|
type: string
|
|
description: Indicates if streaming is enabled.
|
|
example: "true"
|
|
additionalProperties: false
|
|
metadata:
|
|
type: object
|
|
description: Additional metadata.
|
|
assets:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: List of assets related to the model.
|
|
required:
|
|
- source
|
|
ModelObject:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: |
|
|
The identifier of the model.
|
|
example: zephyr-7b
|
|
object:
|
|
type: string
|
|
description: |
|
|
The type of the object, indicating it's a model.
|
|
default: model
|
|
created:
|
|
type: integer
|
|
format: int64
|
|
description: |
|
|
Unix timestamp representing the creation time of the model.
|
|
example: 1253935178
|
|
owned_by:
|
|
type: string
|
|
description: |
|
|
The entity that owns the model.
|
|
example: _
|
|
GetModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: The identifier of the model.
|
|
example: zephyr-7b
|
|
object:
|
|
type: string
|
|
description: Type of the object, indicating it's a model.
|
|
default: model
|
|
created:
|
|
type: integer
|
|
format: int64
|
|
description: Unix timestamp representing the creation time of the model.
|
|
owned_by:
|
|
type: string
|
|
description: The entity that owns the model.
|
|
example: _
|
|
state:
|
|
type: string
|
|
enum:
|
|
- not_downloaded
|
|
- downloaded
|
|
- running
|
|
- stopped
|
|
description: The current state of the model.
|
|
source:
|
|
type: array
|
|
items:
|
|
type: object
|
|
properties:
|
|
url:
|
|
format: uri
|
|
description: URL to the source of the model.
|
|
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
|
|
filename:
|
|
type: string
|
|
description: Filename of the model.
|
|
example: zephyr-7b-beta.Q4_K_M.gguf
|
|
engine_parameters:
|
|
type: object
|
|
properties:
|
|
pre_prompt:
|
|
type: string
|
|
description: Predefined prompt used for setting up internal configurations.
|
|
default: ""
|
|
example: Initial setup complete.
|
|
system_prompt:
|
|
type: string
|
|
description: Prefix used for system-level prompts.
|
|
default: "SYSTEM: "
|
|
user_prompt:
|
|
type: string
|
|
description: Prefix used for user prompts.
|
|
default: "USER: "
|
|
ai_prompt:
|
|
type: string
|
|
description: Prefix used for assistant prompts.
|
|
default: "ASSISTANT: "
|
|
ngl:
|
|
type: integer
|
|
description: Number of neural network layers loaded onto the GPU for
|
|
acceleration.
|
|
minimum: 0
|
|
maximum: 100
|
|
default: 100
|
|
example: 100
|
|
ctx_len:
|
|
type: integer
|
|
description: Context length for model operations, varies based on the specific
|
|
model.
|
|
minimum: 128
|
|
maximum: 4096
|
|
default: 4096
|
|
example: 4096
|
|
n_parallel:
|
|
type: integer
|
|
description: Number of parallel operations, relevant when continuous batching is
|
|
enabled.
|
|
minimum: 1
|
|
maximum: 10
|
|
default: 1
|
|
example: 4
|
|
cont_batching:
|
|
type: boolean
|
|
description: Indicates if continuous batching is used for processing.
|
|
default: false
|
|
example: false
|
|
cpu_threads:
|
|
type: integer
|
|
description: Number of threads allocated for CPU-based inference.
|
|
minimum: 1
|
|
example: 8
|
|
embedding:
|
|
type: boolean
|
|
description: Indicates if embedding layers are enabled in the model.
|
|
default: true
|
|
example: true
|
|
model_parameters:
|
|
type: object
|
|
properties:
|
|
ctx_len:
|
|
type: integer
|
|
description: Maximum context length the model can handle.
|
|
minimum: 0
|
|
maximum: 4096
|
|
default: 4096
|
|
example: 4096
|
|
ngl:
|
|
type: integer
|
|
description: Number of layers in the neural network.
|
|
minimum: 1
|
|
maximum: 100
|
|
default: 100
|
|
example: 100
|
|
embedding:
|
|
type: boolean
|
|
description: Indicates if embedding layers are used.
|
|
default: true
|
|
example: true
|
|
n_parallel:
|
|
type: integer
|
|
description: Number of parallel processes the model can run.
|
|
minimum: 1
|
|
maximum: 10
|
|
default: 1
|
|
example: 4
|
|
temperature:
|
|
type: number
|
|
description: Controls randomness in model's responses. Higher values lead to
|
|
more random responses.
|
|
minimum: 0
|
|
maximum: 2
|
|
default: 0.7
|
|
example: 0.7
|
|
token_limit:
|
|
type: integer
|
|
description: Maximum number of tokens the model can generate in a single
|
|
response.
|
|
minimum: 1
|
|
maximum: 4096
|
|
default: 4096
|
|
example: 4096
|
|
top_k:
|
|
type: integer
|
|
description: Limits the model to consider only the top k most likely next tokens
|
|
at each step.
|
|
minimum: 0
|
|
maximum: 100
|
|
default: 0
|
|
example: 0
|
|
top_p:
|
|
type: number
|
|
description: Nucleus sampling parameter. The model considers the smallest set of
|
|
tokens whose cumulative probability exceeds the top_p value.
|
|
minimum: 0
|
|
maximum: 1
|
|
default: 1
|
|
example: 1
|
|
metadata:
|
|
type: object
|
|
properties:
|
|
engine:
|
|
type: string
|
|
description: The engine used by the model.
|
|
enum:
|
|
- nitro
|
|
- openai
|
|
- hf_inference
|
|
quantization:
|
|
type: string
|
|
description: Quantization parameter of the model.
|
|
example: Q3_K_L
|
|
size:
|
|
type: string
|
|
description: Size of the model.
|
|
example: 7B
|
|
required:
|
|
- id
|
|
- object
|
|
- created
|
|
- owned_by
|
|
- state
|
|
- source
|
|
- parameters
|
|
- metadata
|
|
DeleteModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: The identifier of the model that was deleted.
|
|
example: model-zephyr-7B
|
|
object:
|
|
type: string
|
|
description: Type of the object, indicating it's a model.
|
|
default: model
|
|
deleted:
|
|
type: boolean
|
|
description: Indicates whether the model was successfully deleted.
|
|
example: true
|
|
StartModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: The identifier of the model that was started.
|
|
example: model-zephyr-7B
|
|
object:
|
|
type: string
|
|
description: Type of the object, indicating it's a model.
|
|
default: model
|
|
state:
|
|
type: string
|
|
description: The current state of the model after the start operation.
|
|
example: running
|
|
required:
|
|
- id
|
|
- object
|
|
- state
|
|
StopModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: The identifier of the model that was started.
|
|
example: model-zephyr-7B
|
|
object:
|
|
type: string
|
|
description: Type of the object, indicating it's a model.
|
|
default: model
|
|
state:
|
|
type: string
|
|
description: The current state of the model after the start operation.
|
|
example: stopped
|
|
required:
|
|
- id
|
|
- object
|
|
- state
|
|
DownloadModelResponse:
|
|
type: object
|
|
properties:
|
|
message:
|
|
type: string
|
|
description: Message indicates Jan starting download corresponding model.
|
|
example: Starting download mistral-ins-7b-q4
|