components: schemas: ListModelsResponse: type: object properties: object: type: string enum: [list] data: type: array items: $ref: "#/components/schemas/Model" required: - object - data Model: type: object properties: type: type: string default: "model" description: "The type of the object." version: type: string default: "1" description: "The version number of the model." id: type: string description: "Unique identifier used in chat-completions model_name, matches folder name." example: "zephyr-7b" name: type: string description: "Name of the model." example: "Zephyr 7B" owned_by: type: string description: "Compatibility field for OpenAI." default: "" created: type: integer format: int64 description: "Unix timestamp representing the creation time." description: type: string description: "Description of the model." state: type: string enum: [null, "downloading", "ready", "starting", "stopping"] description: "Current state of the model." format: type: string description: "State format of the model, distinct from the engine." example: "ggufv3" source_url: type: string format: uri description: "URL to the source of the model." example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf" settings: type: object properties: ctx_len: type: string description: "Context length." example: "2048" ngl: type: string description: "Number of layers." example: "100" embedding: type: string description: "Indicates if embedding is enabled." example: "true" n_parallel: type: string description: "Number of parallel processes." example: "4" additionalProperties: false parameters: type: object properties: temperature: type: string description: "Temperature setting for the model." example: "0.7" token_limit: type: string description: "Token limit for the model." example: "2048" top_k: type: string description: "Top-k setting for the model." example: "0" top_p: type: string description: "Top-p setting for the model." example: "1" stream: type: string description: "Indicates if streaming is enabled." example: "true" additionalProperties: false metadata: type: object description: "Additional metadata." assets: type: array items: type: string description: "List of assets related to the model." required: - source_url ModelObject: type: object properties: id: type: string description: | "The identifier of the model." example: "zephyr-7b" object: type: string description: | "The type of the object, indicating it's a model." default: "model" created: type: integer format: int64 description: | "Unix timestamp representing the creation time of the model." example: "1253935178" owned_by: type: string description: | "The entity that owns the model." example: "_" GetModelResponse: type: object properties: id: type: string description: "The identifier of the model." example: "zephyr-7b" object: type: string description: "Type of the object, indicating it's a model." default: "model" created: type: integer format: int64 description: "Unix timestamp representing the creation time of the model." owned_by: type: string description: "The entity that owns the model." example: "_" state: type: string enum: [not_downloaded, downloaded, running, stopped] description: "The current state of the model." source_url: type: string format: uri description: "URL to the source of the model." example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf" parameters: type: object properties: ctx_len: type: integer description: "Context length." example: 2048 ngl: type: integer description: "Number of layers." example: 100 embedding: type: boolean description: "Indicates if embedding is enabled." example: true n_parallel: type: integer description: "Number of parallel processes." example: 4 # pre_prompt: # type: string # description: "Predefined prompt for initiating the chat." # example: "A chat between a curious user and an artificial intelligence" # user_prompt: # type: string # description: "Format of user's prompt." # example: "USER: " # ai_prompt: # type: string # description: "Format of AI's response." # example: "ASSISTANT: " temperature: type: string description: "Temperature setting for the model." example: "0.7" token_limit: type: string description: "Token limit for the model." example: "2048" top_k: type: string description: "Top-k setting for the model." example: "0" top_p: type: string description: "Top-p setting for the model." example: "1" metadata: type: object properties: engine: type: string description: "The engine used by the model." example: "llamacpp" quantization: type: string description: "Quantization parameter of the model." example: "Q3_K_L" size: type: string description: "Size of the model." example: "7B" required: - id - object - created - owned_by - state - source_url - parameters - metadata DeleteModelResponse: type: object properties: id: type: string description: "The identifier of the model that was deleted." example: "model-zephyr-7B" object: type: string description: "Type of the object, indicating it's a model." default: "model" deleted: type: boolean description: "Indicates whether the model was successfully deleted." example: true StartModelResponse: type: object properties: id: type: string description: "The identifier of the model that was started." example: "model-zephyr-7B" object: type: string description: "Type of the object, indicating it's a model." default: "model" state: type: string description: "The current state of the model after the start operation." example: "running" required: - id - object - state StopModelResponse: type: object properties: id: type: string description: "The identifier of the model that was started." example: "model-zephyr-7B" object: type: string description: "Type of the object, indicating it's a model." default: "model" state: type: string description: "The current state of the model after the start operation." example: "stopped" required: - id - object - state DownloadModelResponse: type: object properties: id: type: string description: "The identifier of the model that was started." example: "model-zephyr-7B" object: type: string description: "Type of the object, indicating it's a model." default: "model" state: type: string description: "The current state of the model after the start operation." example: "downloaded"