384 lines
12 KiB
YAML
384 lines
12 KiB
YAML
components:
|
|
schemas:
|
|
ListModelsResponse:
|
|
type: object
|
|
properties:
|
|
object:
|
|
type: string
|
|
enum: [list]
|
|
data:
|
|
type: array
|
|
items:
|
|
$ref: "#/components/schemas/Model"
|
|
required:
|
|
- object
|
|
- data
|
|
|
|
Model:
|
|
type: object
|
|
properties:
|
|
type:
|
|
type: string
|
|
default: "model"
|
|
description: "The type of the object."
|
|
version:
|
|
type: string
|
|
default: "1"
|
|
description: "The version number of the model."
|
|
id:
|
|
type: string
|
|
description: "Unique identifier used in chat-completions model_name, matches folder name."
|
|
example: "zephyr-7b"
|
|
name:
|
|
type: string
|
|
description: "Name of the model."
|
|
example: "Zephyr 7B"
|
|
owned_by:
|
|
type: string
|
|
description: "Compatibility field for OpenAI."
|
|
default: ""
|
|
created:
|
|
type: integer
|
|
format: int64
|
|
description: "Unix timestamp representing the creation time."
|
|
description:
|
|
type: string
|
|
description: "Description of the model."
|
|
state:
|
|
type: string
|
|
enum: [null, "downloading", "ready", "starting", "stopping"]
|
|
description: "Current state of the model."
|
|
format:
|
|
type: string
|
|
description: "State format of the model, distinct from the engine."
|
|
example: "ggufv3"
|
|
source_url:
|
|
type: string
|
|
format: uri
|
|
description: "URL to the source of the model."
|
|
example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
|
|
settings:
|
|
type: object
|
|
properties:
|
|
ctx_len:
|
|
type: string
|
|
description: "Context length."
|
|
example: "2048"
|
|
ngl:
|
|
type: string
|
|
description: "Number of layers."
|
|
example: "100"
|
|
embedding:
|
|
type: string
|
|
description: "Indicates if embedding is enabled."
|
|
example: "true"
|
|
n_parallel:
|
|
type: string
|
|
description: "Number of parallel processes."
|
|
example: "4"
|
|
additionalProperties: false
|
|
parameters:
|
|
type: object
|
|
properties:
|
|
temperature:
|
|
type: string
|
|
description: "Temperature setting for the model."
|
|
example: "0.7"
|
|
token_limit:
|
|
type: string
|
|
description: "Token limit for the model."
|
|
example: "2048"
|
|
top_k:
|
|
type: string
|
|
description: "Top-k setting for the model."
|
|
example: "0"
|
|
top_p:
|
|
type: string
|
|
description: "Top-p setting for the model."
|
|
example: "1"
|
|
stream:
|
|
type: string
|
|
description: "Indicates if streaming is enabled."
|
|
example: "true"
|
|
additionalProperties: false
|
|
metadata:
|
|
type: object
|
|
description: "Additional metadata."
|
|
assets:
|
|
type: array
|
|
items:
|
|
type: string
|
|
description: "List of assets related to the model."
|
|
required:
|
|
- source_url
|
|
|
|
ModelObject:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: |
|
|
"The identifier of the model."
|
|
|
|
example: "zephyr-7b"
|
|
object:
|
|
type: string
|
|
description: |
|
|
"The type of the object, indicating it's a model."
|
|
|
|
default: "model"
|
|
created:
|
|
type: integer
|
|
format: int64
|
|
description: |
|
|
"Unix timestamp representing the creation time of the model."
|
|
|
|
example: "1253935178"
|
|
owned_by:
|
|
type: string
|
|
description: |
|
|
"The entity that owns the model."
|
|
|
|
example: "_"
|
|
|
|
GetModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: "The identifier of the model."
|
|
example: "zephyr-7b"
|
|
object:
|
|
type: string
|
|
description: "Type of the object, indicating it's a model."
|
|
default: "model"
|
|
created:
|
|
type: integer
|
|
format: int64
|
|
description: "Unix timestamp representing the creation time of the model."
|
|
owned_by:
|
|
type: string
|
|
description: "The entity that owns the model."
|
|
example: "_"
|
|
state:
|
|
type: string
|
|
enum: [not_downloaded, downloaded, running, stopped]
|
|
description: "The current state of the model."
|
|
source_url:
|
|
type: string
|
|
format: uri
|
|
description: "URL to the source of the model."
|
|
example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
|
|
engine_parameters:
|
|
type: object
|
|
properties:
|
|
pre_prompt:
|
|
type: string
|
|
description: "Predefined prompt used for setting up internal configurations."
|
|
default: ""
|
|
example: "Initial setup complete."
|
|
system_prompt:
|
|
type: string
|
|
description: "Prefix used for system-level prompts."
|
|
default: "SYSTEM: "
|
|
user_prompt:
|
|
type: string
|
|
description: "Prefix used for user prompts."
|
|
default: "USER: "
|
|
ai_prompt:
|
|
type: string
|
|
description: "Prefix used for assistant prompts."
|
|
default: "ASSISTANT: "
|
|
ngl:
|
|
type: integer
|
|
description: "Number of neural network layers loaded onto the GPU for acceleration."
|
|
minimum: 0
|
|
maximum: 100
|
|
default: 100
|
|
example: 100
|
|
ctx_len:
|
|
type: integer
|
|
description: "Context length for model operations, varies based on the specific model."
|
|
minimum: 128
|
|
maximum: 4096
|
|
default: 2048
|
|
example: 2048
|
|
n_parallel:
|
|
type: integer
|
|
description: "Number of parallel operations, relevant when continuous batching is enabled."
|
|
minimum: 1
|
|
maximum: 10
|
|
default: 1
|
|
example: 4
|
|
cont_batching:
|
|
type: boolean
|
|
description: "Indicates if continuous batching is used for processing."
|
|
default: false
|
|
example: false
|
|
cpu_threads:
|
|
type: integer
|
|
description: "Number of threads allocated for CPU-based inference."
|
|
minimum: 1
|
|
example: 8
|
|
embedding:
|
|
type: boolean
|
|
description: "Indicates if embedding layers are enabled in the model."
|
|
default: true
|
|
example: true
|
|
model_parameters:
|
|
type: object
|
|
properties:
|
|
ctx_len:
|
|
type: integer
|
|
description: "Maximum context length the model can handle."
|
|
minimum: 0
|
|
maximum: 4096
|
|
default: 2048
|
|
example: 2048
|
|
ngl:
|
|
type: integer
|
|
description: "Number of layers in the neural network."
|
|
minimum: 1
|
|
maximum: 100
|
|
default: 100
|
|
example: 100
|
|
embedding:
|
|
type: boolean
|
|
description: "Indicates if embedding layers are used."
|
|
default: true
|
|
example: true
|
|
n_parallel:
|
|
type: integer
|
|
description: "Number of parallel processes the model can run."
|
|
minimum: 1
|
|
maximum: 10
|
|
default: 1
|
|
example: 4
|
|
temperature:
|
|
type: number
|
|
description: "Controls randomness in model's responses. Higher values lead to more random responses."
|
|
minimum: 0.0
|
|
maximum: 2.0
|
|
default: 0.7
|
|
example: 0.7
|
|
token_limit:
|
|
type: integer
|
|
description: "Maximum number of tokens the model can generate in a single response."
|
|
minimum: 1
|
|
maximum: 4096
|
|
default: 2048
|
|
example: 2048
|
|
top_k:
|
|
type: integer
|
|
description: "Limits the model to consider only the top k most likely next tokens at each step."
|
|
minimum: 0
|
|
maximum: 100
|
|
default: 0
|
|
example: 0
|
|
top_p:
|
|
type: number
|
|
description: "Nucleus sampling parameter. The model considers the smallest set of tokens whose cumulative probability exceeds the top_p value."
|
|
minimum: 0.0
|
|
maximum: 1.0
|
|
default: 1.0
|
|
example: 1.0
|
|
|
|
metadata:
|
|
type: object
|
|
properties:
|
|
engine:
|
|
type: string
|
|
description: "The engine used by the model."
|
|
example: "llamacpp"
|
|
quantization:
|
|
type: string
|
|
description: "Quantization parameter of the model."
|
|
example: "Q3_K_L"
|
|
size:
|
|
type: string
|
|
description: "Size of the model."
|
|
example: "7B"
|
|
required:
|
|
- id
|
|
- object
|
|
- created
|
|
- owned_by
|
|
- state
|
|
- source_url
|
|
- parameters
|
|
- metadata
|
|
|
|
DeleteModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: "The identifier of the model that was deleted."
|
|
example: "model-zephyr-7B"
|
|
object:
|
|
type: string
|
|
description: "Type of the object, indicating it's a model."
|
|
default: "model"
|
|
deleted:
|
|
type: boolean
|
|
description: "Indicates whether the model was successfully deleted."
|
|
example: true
|
|
|
|
|
|
StartModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: "The identifier of the model that was started."
|
|
example: "model-zephyr-7B"
|
|
object:
|
|
type: string
|
|
description: "Type of the object, indicating it's a model."
|
|
default: "model"
|
|
state:
|
|
type: string
|
|
description: "The current state of the model after the start operation."
|
|
example: "running"
|
|
required:
|
|
- id
|
|
- object
|
|
- state
|
|
|
|
StopModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: "The identifier of the model that was started."
|
|
example: "model-zephyr-7B"
|
|
object:
|
|
type: string
|
|
description: "Type of the object, indicating it's a model."
|
|
default: "model"
|
|
state:
|
|
type: string
|
|
description: "The current state of the model after the start operation."
|
|
example: "stopped"
|
|
required:
|
|
- id
|
|
- object
|
|
- state
|
|
|
|
DownloadModelResponse:
|
|
type: object
|
|
properties:
|
|
id:
|
|
type: string
|
|
description: "The identifier of the model that was started."
|
|
example: "model-zephyr-7B"
|
|
object:
|
|
type: string
|
|
description: "Type of the object, indicating it's a model."
|
|
default: "model"
|
|
state:
|
|
type: string
|
|
description: "The current state of the model after the start operation."
|
|
example: "downloaded"
|
|
|