remove and add to retrieve model
This commit is contained in:
parent
f99bf0f008
commit
6ac5b0c5f0
@ -1,171 +0,0 @@
|
|||||||
---
|
|
||||||
title: "Inference Parameters"
|
|
||||||
slug: /specs/inference-parameters
|
|
||||||
description: Exhaustive list of json-schema for engine and models
|
|
||||||
---
|
|
||||||
|
|
||||||
# model_parameters
|
|
||||||
|
|
||||||
```js
|
|
||||||
|
|
||||||
{
|
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
||||||
"type": "object",
|
|
||||||
"required": ["messages"],
|
|
||||||
"properties": {
|
|
||||||
"messages": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"model": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"frequency_penalty": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"minimum": -2.0,
|
|
||||||
"maximum": 2.0,
|
|
||||||
"default": 0
|
|
||||||
},
|
|
||||||
"logit_bias": {
|
|
||||||
"type": ["object", "null"],
|
|
||||||
"additionalProperties": {
|
|
||||||
"type": "number",
|
|
||||||
"minimum": -100,
|
|
||||||
"maximum": 100
|
|
||||||
},
|
|
||||||
"default": null
|
|
||||||
},
|
|
||||||
"max_tokens": {
|
|
||||||
"type": ["integer", "null"]
|
|
||||||
},
|
|
||||||
"n": {
|
|
||||||
"type": ["integer", "null"],
|
|
||||||
"default": 1
|
|
||||||
},
|
|
||||||
"presence_penalty": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"minimum": -2.0,
|
|
||||||
"maximum": 2.0,
|
|
||||||
"default": 0
|
|
||||||
},
|
|
||||||
"response_format": {
|
|
||||||
"type": ["object", "null"],
|
|
||||||
"properties": {
|
|
||||||
"type": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"seed": {
|
|
||||||
"type": ["integer", "null"]
|
|
||||||
},
|
|
||||||
"stop": {
|
|
||||||
"type": ["string", "array", "null"],
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"stream": {
|
|
||||||
"type": ["boolean", "null"],
|
|
||||||
"default": false
|
|
||||||
},
|
|
||||||
"temperature": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"minimum": 0,
|
|
||||||
"maximum": 2,
|
|
||||||
"default": 1
|
|
||||||
},
|
|
||||||
"top_p": {
|
|
||||||
"type": ["number", "null"],
|
|
||||||
"minimum": 0,
|
|
||||||
"maximum": 1,
|
|
||||||
"default": 1
|
|
||||||
},
|
|
||||||
"tools": {
|
|
||||||
"type": ["array", "null"],
|
|
||||||
"items": {
|
|
||||||
"type": "object"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"tool_choice": {
|
|
||||||
"type": ["string", "object", "null"]
|
|
||||||
},
|
|
||||||
"user": {
|
|
||||||
"type": ["string", "null"]
|
|
||||||
},
|
|
||||||
"function_call": {
|
|
||||||
"type": ["string", "object", "null"],
|
|
||||||
"deprecated": true
|
|
||||||
},
|
|
||||||
"functions": {
|
|
||||||
"type": ["array", "null"],
|
|
||||||
"items": {
|
|
||||||
"type": "object"
|
|
||||||
},
|
|
||||||
"deprecated": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
# nitro engine_parameters
|
|
||||||
|
|
||||||
```js
|
|
||||||
{
|
|
||||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"pre_prompt": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The prompt to use for internal configuration."
|
|
||||||
},
|
|
||||||
"system_prompt": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The prefix for system prompt."
|
|
||||||
},
|
|
||||||
"user_prompt": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The prefix for user prompt."
|
|
||||||
},
|
|
||||||
"ai_prompt": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The prefix for assistant prompt."
|
|
||||||
},
|
|
||||||
"ngl": {
|
|
||||||
"type": "integer",
|
|
||||||
"default": 100,
|
|
||||||
"minimum": 0,
|
|
||||||
"maximum": 100,
|
|
||||||
"description": "The number of layers to load onto the GPU for acceleration."
|
|
||||||
},
|
|
||||||
"ctx_len": {
|
|
||||||
"type": "integer",
|
|
||||||
"default": 2048,
|
|
||||||
"minimum": 128,
|
|
||||||
"maximum": 4096,
|
|
||||||
"description": "The context length for model operations varies; the maximum depends on the specific model used."
|
|
||||||
},
|
|
||||||
"n_parallel": {
|
|
||||||
"type": "integer",
|
|
||||||
"default": 1,
|
|
||||||
"description": "The number of parallel operations. Only set when enable continuous batching."
|
|
||||||
},
|
|
||||||
"cont_batching": {
|
|
||||||
"type": "boolean",
|
|
||||||
"default": false,
|
|
||||||
"description": "Whether to use continuous batching."
|
|
||||||
},
|
|
||||||
"cpu_threads": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "The number of threads for CPU-based inference."
|
|
||||||
},
|
|
||||||
"embedding": {
|
|
||||||
"type": "boolean",
|
|
||||||
"description": "Whether to enable embedding."
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
@ -169,53 +169,120 @@ components:
|
|||||||
format: uri
|
format: uri
|
||||||
description: "URL to the source of the model."
|
description: "URL to the source of the model."
|
||||||
example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
|
example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
|
||||||
parameters:
|
engine_parameters:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
pre_prompt:
|
||||||
|
type: string
|
||||||
|
description: "Predefined prompt used for setting up internal configurations."
|
||||||
|
default: ""
|
||||||
|
example: "Initial setup complete."
|
||||||
|
system_prompt:
|
||||||
|
type: string
|
||||||
|
description: "Prefix used for system-level prompts."
|
||||||
|
default: "SYSTEM: "
|
||||||
|
user_prompt:
|
||||||
|
type: string
|
||||||
|
description: "Prefix used for user prompts."
|
||||||
|
default: "USER: "
|
||||||
|
ai_prompt:
|
||||||
|
type: string
|
||||||
|
description: "Prefix used for assistant prompts."
|
||||||
|
default: "ASSISTANT: "
|
||||||
|
ngl:
|
||||||
|
type: integer
|
||||||
|
description: "Number of neural network layers loaded onto the GPU for acceleration."
|
||||||
|
minimum: 0
|
||||||
|
maximum: 100
|
||||||
|
default: 100
|
||||||
|
example: 100
|
||||||
|
ctx_len:
|
||||||
|
type: integer
|
||||||
|
description: "Context length for model operations, varies based on the specific model."
|
||||||
|
minimum: 128
|
||||||
|
maximum: 4096
|
||||||
|
default: 2048
|
||||||
|
example: 2048
|
||||||
|
n_parallel:
|
||||||
|
type: integer
|
||||||
|
description: "Number of parallel operations, relevant when continuous batching is enabled."
|
||||||
|
minimum: 1
|
||||||
|
maximum: 10
|
||||||
|
default: 1
|
||||||
|
example: 4
|
||||||
|
cont_batching:
|
||||||
|
type: boolean
|
||||||
|
description: "Indicates if continuous batching is used for processing."
|
||||||
|
default: false
|
||||||
|
example: false
|
||||||
|
cpu_threads:
|
||||||
|
type: integer
|
||||||
|
description: "Number of threads allocated for CPU-based inference."
|
||||||
|
minimum: 1
|
||||||
|
example: 8
|
||||||
|
embedding:
|
||||||
|
type: boolean
|
||||||
|
description: "Indicates if embedding layers are enabled in the model."
|
||||||
|
default: true
|
||||||
|
example: true
|
||||||
|
model_parameters:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
ctx_len:
|
ctx_len:
|
||||||
type: integer
|
type: integer
|
||||||
description: "Context length."
|
description: "Maximum context length the model can handle."
|
||||||
|
minimum: 0
|
||||||
|
maximum: 4096
|
||||||
|
default: 2048
|
||||||
example: 2048
|
example: 2048
|
||||||
ngl:
|
ngl:
|
||||||
type: integer
|
type: integer
|
||||||
description: "Number of layers."
|
description: "Number of layers in the neural network."
|
||||||
|
minimum: 1
|
||||||
|
maximum: 100
|
||||||
|
default: 100
|
||||||
example: 100
|
example: 100
|
||||||
embedding:
|
embedding:
|
||||||
type: boolean
|
type: boolean
|
||||||
description: "Indicates if embedding is enabled."
|
description: "Indicates if embedding layers are used."
|
||||||
|
default: true
|
||||||
example: true
|
example: true
|
||||||
n_parallel:
|
n_parallel:
|
||||||
type: integer
|
type: integer
|
||||||
description: "Number of parallel processes."
|
description: "Number of parallel processes the model can run."
|
||||||
|
minimum: 1
|
||||||
|
maximum: 10
|
||||||
|
default: 1
|
||||||
example: 4
|
example: 4
|
||||||
# pre_prompt:
|
|
||||||
# type: string
|
|
||||||
# description: "Predefined prompt for initiating the chat."
|
|
||||||
# example: "A chat between a curious user and an artificial intelligence"
|
|
||||||
# user_prompt:
|
|
||||||
# type: string
|
|
||||||
# description: "Format of user's prompt."
|
|
||||||
# example: "USER: "
|
|
||||||
# ai_prompt:
|
|
||||||
# type: string
|
|
||||||
# description: "Format of AI's response."
|
|
||||||
# example: "ASSISTANT: "
|
|
||||||
temperature:
|
temperature:
|
||||||
type: string
|
type: number
|
||||||
description: "Temperature setting for the model."
|
description: "Controls randomness in model's responses. Higher values lead to more random responses."
|
||||||
example: "0.7"
|
minimum: 0.0
|
||||||
|
maximum: 2.0
|
||||||
|
default: 0.7
|
||||||
|
example: 0.7
|
||||||
token_limit:
|
token_limit:
|
||||||
type: string
|
type: integer
|
||||||
description: "Token limit for the model."
|
description: "Maximum number of tokens the model can generate in a single response."
|
||||||
example: "2048"
|
minimum: 1
|
||||||
|
maximum: 4096
|
||||||
|
default: 2048
|
||||||
|
example: 2048
|
||||||
top_k:
|
top_k:
|
||||||
type: string
|
type: integer
|
||||||
description: "Top-k setting for the model."
|
description: "Limits the model to consider only the top k most likely next tokens at each step."
|
||||||
example: "0"
|
minimum: 0
|
||||||
|
maximum: 100
|
||||||
|
default: 0
|
||||||
|
example: 0
|
||||||
top_p:
|
top_p:
|
||||||
type: string
|
type: number
|
||||||
description: "Top-p setting for the model."
|
description: "Nucleus sampling parameter. The model considers the smallest set of tokens whose cumulative probability exceeds the top_p value."
|
||||||
example: "1"
|
minimum: 0.0
|
||||||
|
maximum: 1.0
|
||||||
|
default: 1.0
|
||||||
|
example: 1.0
|
||||||
|
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|||||||
@ -82,7 +82,6 @@ const sidebars = {
|
|||||||
"specs/engineering/chats",
|
"specs/engineering/chats",
|
||||||
"specs/engineering/models",
|
"specs/engineering/models",
|
||||||
"specs/engineering/engine",
|
"specs/engineering/engine",
|
||||||
"specs/engineering/inference-parameters",
|
|
||||||
"specs/engineering/threads",
|
"specs/engineering/threads",
|
||||||
"specs/engineering/messages",
|
"specs/engineering/messages",
|
||||||
"specs/engineering/assistants",
|
"specs/engineering/assistants",
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user