jan/docs/openapi/specs/models.yaml
Louis 0e48be67e8
feat: support multiple model binaries (#1659)
* feat: Support multiple model binaries

* fix: Update downloadModel with multiple binaries handler

* feat: Add 3 models with multiple binaries

* chore: fix model download

* fix: model file lookup & model path

* chore: add .prettierrc

* chore: refactor docs

* chore: bump model version

* fix(capybara): add filename

* fix(codeninja): add file name + llama model path

* fix(default): add llama model path

* fix(deepseek coder): add filename

* fix(deepseek 33B): add filename

* fix(dolphin mixtral): add filename

* fix(llama2-chat): add filename

* fix(llama2-70B): add filename

* fix(mistral 7b): add filename + model path

* fix(bakllava): correct size model

* fix(llava-7b): correct size model

* fix(llava-13b): correct size model

* fix(mixtral-8x7b): add file name + modelpath

* fix(noramaid-7b): add file name + modelpath

* fix(openchat-7b): add file name + modelpath

* fix(openhermes-7b): add file name + modelpath

* fix(phi2-3b): add file name + modelpath

* fix(phind): add file name + modelpath

* fix(solarslerp): add file name + modelpath

* fix(starling): add file name + modelpath

* fix(stealth): add file name + modelpath

* fix(tinyllama): add file name + modelpath

* fix(trinity): add file name + modelpath

* fix(tulu): add file name + modelpath

* fix(wizardcoder): add file name + modelpath

* fix(yi): add file name + modelpath

* update from source -> sources

Signed-off-by: James <james@jan.ai>

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: hiro <vuonghoainam.work@gmail.com>
Co-authored-by: hahuyhoang411 <hahuyhoanghhh41@gmail.com>
Co-authored-by: James <james@jan.ai>
2024-01-25 14:05:33 +07:00

400 lines
12 KiB
YAML

---
components:
schemas:
ListModelsResponse:
type: object
properties:
object:
type: string
enum:
- list
data:
type: array
items:
$ref: "#/components/schemas/Model"
required:
- object
- data
Model:
type: object
properties:
type:
type: string
default: model
description: The type of the object.
version:
type: string
default: "1"
description: The version number of the model.
id:
type: string
description: Unique identifier used in chat-completions model_name, matches
folder name.
example: zephyr-7b
name:
type: string
description: Name of the model.
example: Zephyr 7B
owned_by:
type: string
description: Compatibility field for OpenAI.
default: ""
created:
type: integer
format: int64
description: Unix timestamp representing the creation time.
description:
type: string
description: Description of the model.
state:
type: string
enum:
- null
- downloading
- ready
- starting
- stopping
description: Current state of the model.
format:
type: string
description: State format of the model, distinct from the engine.
example: ggufv3
source:
type: array
items:
type: object
properties:
url:
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
filename:
type: string
description: Filename of the model.
example: zephyr-7b-beta.Q4_K_M.gguf
settings:
type: object
properties:
ctx_len:
type: string
description: Context length.
example: "4096"
ngl:
type: string
description: Number of layers.
example: "100"
embedding:
type: string
description: Indicates if embedding is enabled.
example: "true"
n_parallel:
type: string
description: Number of parallel processes.
example: "4"
additionalProperties: false
parameters:
type: object
properties:
temperature:
type: string
description: Temperature setting for the model.
example: "0.7"
token_limit:
type: string
description: Token limit for the model.
example: "4096"
top_k:
type: string
description: Top-k setting for the model.
example: "0"
top_p:
type: string
description: Top-p setting for the model.
example: "1"
stream:
type: string
description: Indicates if streaming is enabled.
example: "true"
additionalProperties: false
metadata:
type: object
description: Additional metadata.
assets:
type: array
items:
type: string
description: List of assets related to the model.
required:
- source
ModelObject:
type: object
properties:
id:
type: string
description: |
The identifier of the model.
example: zephyr-7b
object:
type: string
description: |
The type of the object, indicating it's a model.
default: model
created:
type: integer
format: int64
description: |
Unix timestamp representing the creation time of the model.
example: 1253935178
owned_by:
type: string
description: |
The entity that owns the model.
example: _
GetModelResponse:
type: object
properties:
id:
type: string
description: The identifier of the model.
example: zephyr-7b
object:
type: string
description: Type of the object, indicating it's a model.
default: model
created:
type: integer
format: int64
description: Unix timestamp representing the creation time of the model.
owned_by:
type: string
description: The entity that owns the model.
example: _
state:
type: string
enum:
- not_downloaded
- downloaded
- running
- stopped
description: The current state of the model.
source:
type: array
items:
type: object
properties:
url:
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
filename:
type: string
description: Filename of the model.
example: zephyr-7b-beta.Q4_K_M.gguf
engine_parameters:
type: object
properties:
pre_prompt:
type: string
description: Predefined prompt used for setting up internal configurations.
default: ""
example: Initial setup complete.
system_prompt:
type: string
description: Prefix used for system-level prompts.
default: "SYSTEM: "
user_prompt:
type: string
description: Prefix used for user prompts.
default: "USER: "
ai_prompt:
type: string
description: Prefix used for assistant prompts.
default: "ASSISTANT: "
ngl:
type: integer
description: Number of neural network layers loaded onto the GPU for
acceleration.
minimum: 0
maximum: 100
default: 100
example: 100
ctx_len:
type: integer
description: Context length for model operations, varies based on the specific
model.
minimum: 128
maximum: 4096
default: 4096
example: 4096
n_parallel:
type: integer
description: Number of parallel operations, relevant when continuous batching is
enabled.
minimum: 1
maximum: 10
default: 1
example: 4
cont_batching:
type: boolean
description: Indicates if continuous batching is used for processing.
default: false
example: false
cpu_threads:
type: integer
description: Number of threads allocated for CPU-based inference.
minimum: 1
example: 8
embedding:
type: boolean
description: Indicates if embedding layers are enabled in the model.
default: true
example: true
model_parameters:
type: object
properties:
ctx_len:
type: integer
description: Maximum context length the model can handle.
minimum: 0
maximum: 4096
default: 4096
example: 4096
ngl:
type: integer
description: Number of layers in the neural network.
minimum: 1
maximum: 100
default: 100
example: 100
embedding:
type: boolean
description: Indicates if embedding layers are used.
default: true
example: true
n_parallel:
type: integer
description: Number of parallel processes the model can run.
minimum: 1
maximum: 10
default: 1
example: 4
temperature:
type: number
description: Controls randomness in model's responses. Higher values lead to
more random responses.
minimum: 0
maximum: 2
default: 0.7
example: 0.7
token_limit:
type: integer
description: Maximum number of tokens the model can generate in a single
response.
minimum: 1
maximum: 4096
default: 4096
example: 4096
top_k:
type: integer
description: Limits the model to consider only the top k most likely next tokens
at each step.
minimum: 0
maximum: 100
default: 0
example: 0
top_p:
type: number
description: Nucleus sampling parameter. The model considers the smallest set of
tokens whose cumulative probability exceeds the top_p value.
minimum: 0
maximum: 1
default: 1
example: 1
metadata:
type: object
properties:
engine:
type: string
description: The engine used by the model.
enum:
- nitro
- openai
- hf_inference
quantization:
type: string
description: Quantization parameter of the model.
example: Q3_K_L
size:
type: string
description: Size of the model.
example: 7B
required:
- id
- object
- created
- owned_by
- state
- source
- parameters
- metadata
DeleteModelResponse:
type: object
properties:
id:
type: string
description: The identifier of the model that was deleted.
example: model-zephyr-7B
object:
type: string
description: Type of the object, indicating it's a model.
default: model
deleted:
type: boolean
description: Indicates whether the model was successfully deleted.
example: true
StartModelResponse:
type: object
properties:
id:
type: string
description: The identifier of the model that was started.
example: model-zephyr-7B
object:
type: string
description: Type of the object, indicating it's a model.
default: model
state:
type: string
description: The current state of the model after the start operation.
example: running
required:
- id
- object
- state
StopModelResponse:
type: object
properties:
id:
type: string
description: The identifier of the model that was started.
example: model-zephyr-7B
object:
type: string
description: Type of the object, indicating it's a model.
default: model
state:
type: string
description: The current state of the model after the start operation.
example: stopped
required:
- id
- object
- state
DownloadModelResponse:
type: object
properties:
message:
type: string
description: Message indicates Jan starting download corresponding model.
example: Starting download mistral-ins-7b-q4