remove and add to retrieve model

This commit is contained in:
tikikun 2023-12-05 18:53:29 +07:00
parent f99bf0f008
commit 6ac5b0c5f0
3 changed files with 96 additions and 201 deletions

View File

@ -1,171 +0,0 @@
---
title: "Inference Parameters"
slug: /specs/inference-parameters
description: Exhaustive list of json-schema for engine and models
---
# model_parameters
```js
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"required": ["messages"],
"properties": {
"messages": {
"type": "array",
"items": {
"type": "object"
}
},
"model": {
"type": "string"
},
"frequency_penalty": {
"type": ["number", "null"],
"minimum": -2.0,
"maximum": 2.0,
"default": 0
},
"logit_bias": {
"type": ["object", "null"],
"additionalProperties": {
"type": "number",
"minimum": -100,
"maximum": 100
},
"default": null
},
"max_tokens": {
"type": ["integer", "null"]
},
"n": {
"type": ["integer", "null"],
"default": 1
},
"presence_penalty": {
"type": ["number", "null"],
"minimum": -2.0,
"maximum": 2.0,
"default": 0
},
"response_format": {
"type": ["object", "null"],
"properties": {
"type": {
"type": "string"
}
}
},
"seed": {
"type": ["integer", "null"]
},
"stop": {
"type": ["string", "array", "null"],
"items": {
"type": "string"
}
},
"stream": {
"type": ["boolean", "null"],
"default": false
},
"temperature": {
"type": ["number", "null"],
"minimum": 0,
"maximum": 2,
"default": 1
},
"top_p": {
"type": ["number", "null"],
"minimum": 0,
"maximum": 1,
"default": 1
},
"tools": {
"type": ["array", "null"],
"items": {
"type": "object"
}
},
"tool_choice": {
"type": ["string", "object", "null"]
},
"user": {
"type": ["string", "null"]
},
"function_call": {
"type": ["string", "object", "null"],
"deprecated": true
},
"functions": {
"type": ["array", "null"],
"items": {
"type": "object"
},
"deprecated": true
}
}
}
```
# nitro engine_parameters
```js
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"pre_prompt": {
"type": "string",
"description": "The prompt to use for internal configuration."
},
"system_prompt": {
"type": "string",
"description": "The prefix for system prompt."
},
"user_prompt": {
"type": "string",
"description": "The prefix for user prompt."
},
"ai_prompt": {
"type": "string",
"description": "The prefix for assistant prompt."
},
"ngl": {
"type": "integer",
"default": 100,
"minimum": 0,
"maximum": 100,
"description": "The number of layers to load onto the GPU for acceleration."
},
"ctx_len": {
"type": "integer",
"default": 2048,
"minimum": 128,
"maximum": 4096,
"description": "The context length for model operations varies; the maximum depends on the specific model used."
},
"n_parallel": {
"type": "integer",
"default": 1,
"description": "The number of parallel operations. Only set when enable continuous batching."
},
"cont_batching": {
"type": "boolean",
"default": false,
"description": "Whether to use continuous batching."
},
"cpu_threads": {
"type": "integer",
"description": "The number of threads for CPU-based inference."
},
"embedding": {
"type": "boolean",
"description": "Whether to enable embedding."
}
}
}
```

View File

@ -169,53 +169,120 @@ components:
format: uri format: uri
description: "URL to the source of the model." description: "URL to the source of the model."
example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf" example: "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
parameters: engine_parameters:
type: object
properties:
pre_prompt:
type: string
description: "Predefined prompt used for setting up internal configurations."
default: ""
example: "Initial setup complete."
system_prompt:
type: string
description: "Prefix used for system-level prompts."
default: "SYSTEM: "
user_prompt:
type: string
description: "Prefix used for user prompts."
default: "USER: "
ai_prompt:
type: string
description: "Prefix used for assistant prompts."
default: "ASSISTANT: "
ngl:
type: integer
description: "Number of neural network layers loaded onto the GPU for acceleration."
minimum: 0
maximum: 100
default: 100
example: 100
ctx_len:
type: integer
description: "Context length for model operations, varies based on the specific model."
minimum: 128
maximum: 4096
default: 2048
example: 2048
n_parallel:
type: integer
description: "Number of parallel operations, relevant when continuous batching is enabled."
minimum: 1
maximum: 10
default: 1
example: 4
cont_batching:
type: boolean
description: "Indicates if continuous batching is used for processing."
default: false
example: false
cpu_threads:
type: integer
description: "Number of threads allocated for CPU-based inference."
minimum: 1
example: 8
embedding:
type: boolean
description: "Indicates if embedding layers are enabled in the model."
default: true
example: true
model_parameters:
type: object type: object
properties: properties:
ctx_len: ctx_len:
type: integer type: integer
description: "Context length." description: "Maximum context length the model can handle."
minimum: 0
maximum: 4096
default: 2048
example: 2048 example: 2048
ngl: ngl:
type: integer type: integer
description: "Number of layers." description: "Number of layers in the neural network."
minimum: 1
maximum: 100
default: 100
example: 100 example: 100
embedding: embedding:
type: boolean type: boolean
description: "Indicates if embedding is enabled." description: "Indicates if embedding layers are used."
default: true
example: true example: true
n_parallel: n_parallel:
type: integer type: integer
description: "Number of parallel processes." description: "Number of parallel processes the model can run."
minimum: 1
maximum: 10
default: 1
example: 4 example: 4
# pre_prompt:
# type: string
# description: "Predefined prompt for initiating the chat."
# example: "A chat between a curious user and an artificial intelligence"
# user_prompt:
# type: string
# description: "Format of user's prompt."
# example: "USER: "
# ai_prompt:
# type: string
# description: "Format of AI's response."
# example: "ASSISTANT: "
temperature: temperature:
type: string type: number
description: "Temperature setting for the model." description: "Controls randomness in model's responses. Higher values lead to more random responses."
example: "0.7" minimum: 0.0
maximum: 2.0
default: 0.7
example: 0.7
token_limit: token_limit:
type: string type: integer
description: "Token limit for the model." description: "Maximum number of tokens the model can generate in a single response."
example: "2048" minimum: 1
maximum: 4096
default: 2048
example: 2048
top_k: top_k:
type: string type: integer
description: "Top-k setting for the model." description: "Limits the model to consider only the top k most likely next tokens at each step."
example: "0" minimum: 0
maximum: 100
default: 0
example: 0
top_p: top_p:
type: string type: number
description: "Top-p setting for the model." description: "Nucleus sampling parameter. The model considers the smallest set of tokens whose cumulative probability exceeds the top_p value."
example: "1" minimum: 0.0
maximum: 1.0
default: 1.0
example: 1.0
metadata: metadata:
type: object type: object
properties: properties:

View File

@ -82,7 +82,6 @@ const sidebars = {
"specs/engineering/chats", "specs/engineering/chats",
"specs/engineering/models", "specs/engineering/models",
"specs/engineering/engine", "specs/engineering/engine",
"specs/engineering/inference-parameters",
"specs/engineering/threads", "specs/engineering/threads",
"specs/engineering/messages", "specs/engineering/messages",
"specs/engineering/assistants", "specs/engineering/assistants",