jan/website/public/openapi/openapi.json
2025-07-25 15:45:11 +10:00

1460 lines
53 KiB
JSON

{
"openapi": "3.1.0",
"info": { "title": "👋Jan API", "version": "0.3.14" },
"paths": {
"/v1/completions": {
"post": {
"tags": ["OpenAI V1"],
"summary": "Completion",
"operationId": "create_completion_v1_completions_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateCompletionRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"anyOf": [
{ "$ref": "#/components/schemas/CreateCompletionResponse" },
{ "type": "string" },
{ "$ref": "#/components/schemas/CreateCompletionResponse" }
],
"title": "Completion response, when stream=False"
}
},
"text/event-stream": {
"schema": {
"type": "string",
"title": "Server Side Streaming response, when stream=True. See SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format",
"example": "data: {... see CreateCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/HTTPValidationError" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
},
"/v1/embeddings": {
"post": {
"tags": ["OpenAI V1"],
"summary": "Embedding",
"operationId": "create_embedding_v1_embeddings_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateEmbeddingRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": { "application/json": { "schema": {} } }
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/HTTPValidationError" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
},
"/v1/chat/completions": {
"post": {
"tags": ["OpenAI V1"],
"summary": "Chat",
"operationId": "create_chat_completion_v1_chat_completions_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateChatCompletionRequest"
},
"examples": {
"normal": {
"summary": "Chat Completion",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
]
}
},
"json_mode": {
"summary": "JSON Mode",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Who won the world series in 2020"
}
],
"response_format": { "type": "json_object" }
}
},
"tool_calling": {
"summary": "Tool Calling",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Extract Jason is 30 years old."
}
],
"tools": [
{
"type": "function",
"function": {
"name": "User",
"description": "User record",
"parameters": {
"type": "object",
"properties": {
"name": { "type": "string" },
"age": { "type": "number" }
},
"required": ["name", "age"]
}
}
}
],
"tool_choice": {
"type": "function",
"function": { "name": "User" }
}
}
},
"logprobs": {
"summary": "Logprobs",
"value": {
"model": "gpt-3.5-turbo",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "What is the capital of France?"
}
],
"logprobs": true,
"top_logprobs": 10
}
}
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"anyOf": [
{
"$ref": "#/components/schemas/CreateChatCompletionResponse"
},
{ "type": "string" },
{
"$ref": "#/components/schemas/CreateChatCompletionResponse"
}
],
"title": "Completion response, when stream=False"
}
},
"text/event-stream": {
"schema": {
"type": "string",
"title": "Server Side Streaming response, when stream=TrueSee SSE format: https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format",
"example": "data: {... see CreateChatCompletionResponse ...} \\n\\n data: ... \\n\\n ... data: [DONE]"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/HTTPValidationError" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
},
"/v1/models": {
"get": {
"tags": ["OpenAI V1"],
"summary": "Models",
"operationId": "get_models_v1_models_get",
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/ModelList" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
},
"/extras/tokenize": {
"post": {
"tags": ["Extras"],
"summary": "Tokenize",
"operationId": "tokenize_extras_tokenize_post",
"requestBody": {
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/TokenizeInputRequest" }
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/TokenizeInputResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/HTTPValidationError" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
},
"/extras/tokenize/count": {
"post": {
"tags": ["Extras"],
"summary": "Tokenize Count",
"operationId": "count_query_tokens_extras_tokenize_count_post",
"requestBody": {
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/TokenizeInputRequest" }
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/TokenizeInputCountResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/HTTPValidationError" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
},
"/extras/detokenize": {
"post": {
"tags": ["Extras"],
"summary": "Detokenize",
"operationId": "detokenize_extras_detokenize_post",
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DetokenizeInputRequest"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DetokenizeInputResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": { "$ref": "#/components/schemas/HTTPValidationError" }
}
}
}
},
"security": [{ "HTTPBearer": [] }]
}
}
},
"components": {
"schemas": {
"ChatCompletionFunction": {
"properties": {
"name": { "type": "string", "title": "Name" },
"description": { "type": "string", "title": "Description" },
"parameters": {
"additionalProperties": {
"anyOf": [
{ "type": "integer" },
{ "type": "string" },
{ "type": "boolean" },
{ "items": {}, "type": "array" },
{ "additionalProperties": true, "type": "object" },
{ "type": "null" }
]
},
"type": "object",
"title": "Parameters"
}
},
"type": "object",
"required": ["name", "parameters"],
"title": "ChatCompletionFunction"
},
"ChatCompletionLogprobToken": {
"properties": {
"token": { "type": "string", "title": "Token" },
"logprob": { "type": "number", "title": "Logprob" },
"bytes": {
"anyOf": [
{ "items": { "type": "integer" }, "type": "array" },
{ "type": "null" }
],
"title": "Bytes"
},
"top_logprobs": {
"items": {
"$ref": "#/components/schemas/ChatCompletionTopLogprobToken"
},
"type": "array",
"title": "Top Logprobs"
}
},
"type": "object",
"required": ["token", "logprob", "bytes", "top_logprobs"],
"title": "ChatCompletionLogprobToken"
},
"ChatCompletionLogprobs": {
"properties": {
"content": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ChatCompletionLogprobToken"
},
"type": "array"
},
{ "type": "null" }
],
"title": "Content"
},
"refusal": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ChatCompletionLogprobToken"
},
"type": "array"
},
{ "type": "null" }
],
"title": "Refusal"
}
},
"type": "object",
"required": ["content", "refusal"],
"title": "ChatCompletionLogprobs"
},
"ChatCompletionMessageToolCall": {
"properties": {
"id": { "type": "string", "title": "Id" },
"type": { "type": "string", "const": "function", "title": "Type" },
"function": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCallFunction"
}
},
"type": "object",
"required": ["id", "type", "function"],
"title": "ChatCompletionMessageToolCall"
},
"ChatCompletionMessageToolCallFunction": {
"properties": {
"name": { "type": "string", "title": "Name" },
"arguments": { "type": "string", "title": "Arguments" }
},
"type": "object",
"required": ["name", "arguments"],
"title": "ChatCompletionMessageToolCallFunction"
},
"ChatCompletionNamedToolChoice": {
"properties": {
"type": { "type": "string", "const": "function", "title": "Type" },
"function": {
"$ref": "#/components/schemas/ChatCompletionNamedToolChoiceFunction"
}
},
"type": "object",
"required": ["type", "function"],
"title": "ChatCompletionNamedToolChoice"
},
"ChatCompletionNamedToolChoiceFunction": {
"properties": { "name": { "type": "string", "title": "Name" } },
"type": "object",
"required": ["name"],
"title": "ChatCompletionNamedToolChoiceFunction"
},
"ChatCompletionRequestAssistantMessage": {
"properties": {
"role": { "type": "string", "const": "assistant", "title": "Role" },
"content": { "type": "string", "title": "Content" },
"tool_calls": {
"items": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCall"
},
"type": "array",
"title": "Tool Calls"
},
"function_call": {
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageFunctionCall"
}
},
"type": "object",
"required": ["role"],
"title": "ChatCompletionRequestAssistantMessage"
},
"ChatCompletionRequestAssistantMessageFunctionCall": {
"properties": {
"name": { "type": "string", "title": "Name" },
"arguments": { "type": "string", "title": "Arguments" }
},
"type": "object",
"required": ["name", "arguments"],
"title": "ChatCompletionRequestAssistantMessageFunctionCall"
},
"ChatCompletionRequestFunctionCallOption": {
"properties": { "name": { "type": "string", "title": "Name" } },
"type": "object",
"required": ["name"],
"title": "ChatCompletionRequestFunctionCallOption"
},
"ChatCompletionRequestFunctionMessage": {
"properties": {
"role": { "type": "string", "const": "function", "title": "Role" },
"content": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Content"
},
"name": { "type": "string", "title": "Name" }
},
"type": "object",
"required": ["role", "content", "name"],
"title": "ChatCompletionRequestFunctionMessage"
},
"ChatCompletionRequestMessageContentPartImage": {
"properties": {
"type": { "type": "string", "const": "image_url", "title": "Type" },
"image_url": {
"anyOf": [
{ "type": "string" },
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImageImageUrl"
}
],
"title": "Image Url"
}
},
"type": "object",
"required": ["type", "image_url"],
"title": "ChatCompletionRequestMessageContentPartImage"
},
"ChatCompletionRequestMessageContentPartImageImageUrl": {
"properties": {
"url": { "type": "string", "title": "Url" },
"detail": {
"type": "string",
"enum": ["auto", "low", "high"],
"title": "Detail"
}
},
"type": "object",
"required": ["url"],
"title": "ChatCompletionRequestMessageContentPartImageImageUrl"
},
"ChatCompletionRequestMessageContentPartText": {
"properties": {
"type": { "type": "string", "const": "text", "title": "Type" },
"text": { "type": "string", "title": "Text" }
},
"type": "object",
"required": ["type", "text"],
"title": "ChatCompletionRequestMessageContentPartText"
},
"ChatCompletionRequestResponseFormat": {
"properties": {
"type": {
"type": "string",
"enum": ["text", "json_object"],
"title": "Type"
},
"schema": {
"anyOf": [
{ "type": "integer" },
{ "type": "string" },
{ "type": "boolean" },
{ "items": {}, "type": "array" },
{ "additionalProperties": true, "type": "object" },
{ "type": "null" }
],
"title": "Schema"
}
},
"type": "object",
"required": ["type"],
"title": "ChatCompletionRequestResponseFormat"
},
"ChatCompletionRequestSystemMessage": {
"properties": {
"role": { "type": "string", "const": "system", "title": "Role" },
"content": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Content"
}
},
"type": "object",
"required": ["role", "content"],
"title": "ChatCompletionRequestSystemMessage"
},
"ChatCompletionRequestToolMessage": {
"properties": {
"role": { "type": "string", "const": "tool", "title": "Role" },
"content": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Content"
},
"tool_call_id": { "type": "string", "title": "Tool Call Id" }
},
"type": "object",
"required": ["role", "content", "tool_call_id"],
"title": "ChatCompletionRequestToolMessage"
},
"ChatCompletionRequestUserMessage": {
"properties": {
"role": { "type": "string", "const": "user", "title": "Role" },
"content": {
"anyOf": [
{ "type": "string" },
{
"items": {
"anyOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImage"
}
]
},
"type": "array"
},
{ "type": "null" }
],
"title": "Content"
}
},
"type": "object",
"required": ["role", "content"],
"title": "ChatCompletionRequestUserMessage"
},
"ChatCompletionResponseChoice": {
"properties": {
"index": { "type": "integer", "title": "Index" },
"message": {
"$ref": "#/components/schemas/ChatCompletionResponseMessage"
},
"logprobs": {
"anyOf": [
{ "$ref": "#/components/schemas/ChatCompletionLogprobs" },
{ "type": "null" }
]
},
"finish_reason": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Finish Reason"
}
},
"type": "object",
"required": ["index", "message", "logprobs", "finish_reason"],
"title": "ChatCompletionResponseChoice"
},
"ChatCompletionResponseFunctionCall": {
"properties": {
"name": { "type": "string", "title": "Name" },
"arguments": { "type": "string", "title": "Arguments" }
},
"type": "object",
"required": ["name", "arguments"],
"title": "ChatCompletionResponseFunctionCall"
},
"ChatCompletionResponseMessage": {
"properties": {
"content": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Content"
},
"tool_calls": {
"items": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCall"
},
"type": "array",
"title": "Tool Calls"
},
"role": {
"type": "string",
"enum": ["assistant", "function"],
"title": "Role"
},
"function_call": {
"$ref": "#/components/schemas/ChatCompletionResponseFunctionCall"
}
},
"type": "object",
"required": ["content", "role"],
"title": "ChatCompletionResponseMessage"
},
"ChatCompletionTool": {
"properties": {
"type": { "type": "string", "const": "function", "title": "Type" },
"function": {
"$ref": "#/components/schemas/ChatCompletionToolFunction"
}
},
"type": "object",
"required": ["type", "function"],
"title": "ChatCompletionTool"
},
"ChatCompletionToolFunction": {
"properties": {
"name": { "type": "string", "title": "Name" },
"description": { "type": "string", "title": "Description" },
"parameters": {
"additionalProperties": {
"anyOf": [
{ "type": "integer" },
{ "type": "string" },
{ "type": "boolean" },
{ "items": {}, "type": "array" },
{ "additionalProperties": true, "type": "object" },
{ "type": "null" }
]
},
"type": "object",
"title": "Parameters"
}
},
"type": "object",
"required": ["name", "parameters"],
"title": "ChatCompletionToolFunction"
},
"ChatCompletionTopLogprobToken": {
"properties": {
"token": { "type": "string", "title": "Token" },
"logprob": { "type": "number", "title": "Logprob" },
"bytes": {
"anyOf": [
{ "items": { "type": "integer" }, "type": "array" },
{ "type": "null" }
],
"title": "Bytes"
}
},
"type": "object",
"required": ["token", "logprob", "bytes"],
"title": "ChatCompletionTopLogprobToken"
},
"CompletionChoice": {
"properties": {
"text": { "type": "string", "title": "Text" },
"index": { "type": "integer", "title": "Index" },
"logprobs": {
"anyOf": [
{ "$ref": "#/components/schemas/CompletionLogprobs" },
{ "type": "null" }
]
},
"finish_reason": {
"anyOf": [
{ "type": "string", "enum": ["stop", "length"] },
{ "type": "null" }
],
"title": "Finish Reason"
}
},
"type": "object",
"required": ["text", "index", "logprobs", "finish_reason"],
"title": "CompletionChoice"
},
"CompletionLogprobs": {
"properties": {
"text_offset": {
"items": { "type": "integer" },
"type": "array",
"title": "Text Offset"
},
"token_logprobs": {
"items": { "anyOf": [{ "type": "number" }, { "type": "null" }] },
"type": "array",
"title": "Token Logprobs"
},
"tokens": {
"items": { "type": "string" },
"type": "array",
"title": "Tokens"
},
"top_logprobs": {
"items": {
"anyOf": [
{
"additionalProperties": { "type": "number" },
"type": "object"
},
{ "type": "null" }
]
},
"type": "array",
"title": "Top Logprobs"
}
},
"type": "object",
"required": ["text_offset", "token_logprobs", "tokens", "top_logprobs"],
"title": "CompletionLogprobs"
},
"CompletionUsage": {
"properties": {
"prompt_tokens": { "type": "integer", "title": "Prompt Tokens" },
"completion_tokens": {
"type": "integer",
"title": "Completion Tokens"
},
"total_tokens": { "type": "integer", "title": "Total Tokens" }
},
"type": "object",
"required": ["prompt_tokens", "completion_tokens", "total_tokens"],
"title": "CompletionUsage"
},
"CreateChatCompletionRequest": {
"properties": {
"messages": {
"items": {
"anyOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestSystemMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestUserMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestToolMessage"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestFunctionMessage"
}
]
},
"type": "array",
"title": "Messages",
"description": "A list of messages to generate completions for.",
"default": []
},
"functions": {
"anyOf": [
{
"items": {
"$ref": "#/components/schemas/ChatCompletionFunction"
},
"type": "array"
},
{ "type": "null" }
],
"title": "Functions",
"description": "A list of functions to apply to the generated completions."
},
"function_call": {
"anyOf": [
{ "type": "string", "enum": ["none", "auto"] },
{
"$ref": "#/components/schemas/ChatCompletionRequestFunctionCallOption"
},
{ "type": "null" }
],
"title": "Function Call",
"description": "A function to apply to the generated completions."
},
"tools": {
"anyOf": [
{
"items": { "$ref": "#/components/schemas/ChatCompletionTool" },
"type": "array"
},
{ "type": "null" }
],
"title": "Tools",
"description": "A list of tools to apply to the generated completions."
},
"tool_choice": {
"anyOf": [
{ "type": "string", "enum": ["none", "auto", "required"] },
{ "$ref": "#/components/schemas/ChatCompletionNamedToolChoice" },
{ "type": "null" }
],
"title": "Tool Choice",
"description": "A tool to apply to the generated completions."
},
"max_tokens": {
"anyOf": [{ "type": "integer" }, { "type": "null" }],
"title": "Max Tokens",
"description": "The maximum number of tokens to generate. Defaults to inf"
},
"min_tokens": {
"type": "integer",
"minimum": 0.0,
"title": "Min Tokens",
"description": "The minimum number of tokens to generate. It may return fewer tokens if another condition is met (e.g. max_tokens, stop).",
"default": 0
},
"logprobs": {
"anyOf": [{ "type": "boolean" }, { "type": "null" }],
"title": "Logprobs",
"description": "Whether to output the logprobs or not. Default is True",
"default": false
},
"top_logprobs": {
"anyOf": [
{ "type": "integer", "minimum": 0.0 },
{ "type": "null" }
],
"title": "Top Logprobs",
"description": "The number of logprobs to generate. If None, no logprobs are generated. logprobs need to set to True."
},
"temperature": {
"type": "number",
"title": "Temperature",
"description": "Adjust the randomness of the generated text.\n\nTemperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
"default": 0.8
},
"top_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Top P",
"description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\nTop-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.",
"default": 0.95
},
"min_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Min P",
"description": "Sets a minimum base probability threshold for token selection.\n\nThe Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.",
"default": 0.05
},
"stop": {
"anyOf": [
{ "type": "string" },
{ "items": { "type": "string" }, "type": "array" },
{ "type": "null" }
],
"title": "Stop",
"description": "A list of tokens at which to stop generation. If None, no stop tokens are used."
},
"stream": {
"type": "boolean",
"title": "Stream",
"description": "Whether to stream the results as they are generated. Useful for chatbots.",
"default": false
},
"presence_penalty": {
"anyOf": [
{ "type": "number", "maximum": 2.0, "minimum": -2.0 },
{ "type": "null" }
],
"title": "Presence Penalty",
"description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
"default": 0.0
},
"frequency_penalty": {
"anyOf": [
{ "type": "number", "maximum": 2.0, "minimum": -2.0 },
{ "type": "null" }
],
"title": "Frequency Penalty",
"description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
"default": 0.0
},
"logit_bias": {
"anyOf": [
{
"additionalProperties": { "type": "number" },
"type": "object"
},
{ "type": "null" }
],
"title": "Logit Bias"
},
"seed": {
"anyOf": [{ "type": "integer" }, { "type": "null" }],
"title": "Seed"
},
"response_format": {
"anyOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestResponseFormat"
},
{ "type": "null" }
]
},
"model": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Model",
"description": "The model to use for generating completions."
},
"n": {
"anyOf": [{ "type": "integer" }, { "type": "null" }],
"title": "N",
"default": 1
},
"user": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "User"
},
"top_k": {
"type": "integer",
"minimum": 0.0,
"title": "Top K",
"description": "Limit the next token selection to the K most probable tokens.\n\nTop-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.",
"default": 40
},
"repeat_penalty": {
"type": "number",
"minimum": 0.0,
"title": "Repeat Penalty",
"description": "A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\nRepeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.",
"default": 1.1
},
"logit_bias_type": {
"anyOf": [
{ "type": "string", "enum": ["input_ids", "tokens"] },
{ "type": "null" }
],
"title": "Logit Bias Type"
},
"mirostat_mode": {
"type": "integer",
"maximum": 2.0,
"minimum": 0.0,
"title": "Mirostat Mode",
"description": "Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)",
"default": 0
},
"mirostat_tau": {
"type": "number",
"maximum": 10.0,
"minimum": 0.0,
"title": "Mirostat Tau",
"description": "Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text",
"default": 5.0
},
"mirostat_eta": {
"type": "number",
"maximum": 1.0,
"minimum": 0.001,
"title": "Mirostat Eta",
"description": "Mirostat learning rate",
"default": 0.1
},
"grammar": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Grammar"
}
},
"type": "object",
"title": "CreateChatCompletionRequest",
"examples": [
{
"messages": [
{ "content": "You are a helpful assistant.", "role": "system" },
{ "content": "What is the capital of France?", "role": "user" }
]
}
]
},
"CreateChatCompletionResponse": {
"properties": {
"id": { "type": "string", "title": "Id" },
"object": {
"type": "string",
"const": "chat.completion",
"title": "Object"
},
"created": { "type": "integer", "title": "Created" },
"model": { "type": "string", "title": "Model" },
"choices": {
"items": {
"$ref": "#/components/schemas/ChatCompletionResponseChoice"
},
"type": "array",
"title": "Choices"
},
"usage": { "$ref": "#/components/schemas/CompletionUsage" }
},
"type": "object",
"required": ["id", "object", "created", "model", "choices", "usage"],
"title": "CreateChatCompletionResponse"
},
"CreateCompletionRequest": {
"properties": {
"prompt": {
"anyOf": [
{ "type": "string" },
{ "items": { "type": "string" }, "type": "array" }
],
"title": "Prompt",
"description": "The prompt to generate completions for.",
"default": ""
},
"suffix": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Suffix",
"description": "A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots."
},
"max_tokens": {
"anyOf": [
{ "type": "integer", "minimum": 0.0 },
{ "type": "null" }
],
"title": "Max Tokens",
"description": "The maximum number of tokens to generate.",
"default": 16
},
"min_tokens": {
"type": "integer",
"minimum": 0.0,
"title": "Min Tokens",
"description": "The minimum number of tokens to generate. It may return fewer tokens if another condition is met (e.g. max_tokens, stop).",
"default": 0
},
"temperature": {
"type": "number",
"title": "Temperature",
"description": "Adjust the randomness of the generated text.\n\nTemperature is a hyperparameter that controls the randomness of the generated text. It affects the probability distribution of the model's output tokens. A higher temperature (e.g., 1.5) makes the output more random and creative, while a lower temperature (e.g., 0.5) makes the output more focused, deterministic, and conservative. The default value is 0.8, which provides a balance between randomness and determinism. At the extreme, a temperature of 0 will always pick the most likely next token, leading to identical outputs in each run.",
"default": 0.8
},
"top_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Top P",
"description": "Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P.\n\nTop-p sampling, also known as nucleus sampling, is another text generation method that selects the next token from a subset of tokens that together have a cumulative probability of at least p. This method provides a balance between diversity and quality by considering both the probabilities of tokens and the number of tokens to sample from. A higher value for top_p (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text.",
"default": 0.95
},
"min_p": {
"type": "number",
"maximum": 1.0,
"minimum": 0.0,
"title": "Min P",
"description": "Sets a minimum base probability threshold for token selection.\n\nThe Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter min_p represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with min_p=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out.",
"default": 0.05
},
"echo": {
"type": "boolean",
"title": "Echo",
"description": "Whether to echo the prompt in the generated text. Useful for chatbots.",
"default": false
},
"stop": {
"anyOf": [
{ "type": "string" },
{ "items": { "type": "string" }, "type": "array" },
{ "type": "null" }
],
"title": "Stop",
"description": "A list of tokens at which to stop generation. If None, no stop tokens are used."
},
"stream": {
"type": "boolean",
"title": "Stream",
"description": "Whether to stream the results as they are generated. Useful for chatbots.",
"default": false
},
"logprobs": {
"anyOf": [
{ "type": "integer", "minimum": 0.0 },
{ "type": "null" }
],
"title": "Logprobs",
"description": "The number of logprobs to generate. If None, no logprobs are generated."
},
"presence_penalty": {
"anyOf": [
{ "type": "number", "maximum": 2.0, "minimum": -2.0 },
{ "type": "null" }
],
"title": "Presence Penalty",
"description": "Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
"default": 0.0
},
"frequency_penalty": {
"anyOf": [
{ "type": "number", "maximum": 2.0, "minimum": -2.0 },
{ "type": "null" }
],
"title": "Frequency Penalty",
"description": "Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
"default": 0.0
},
"logit_bias": {
"anyOf": [
{
"additionalProperties": { "type": "number" },
"type": "object"
},
{ "type": "null" }
],
"title": "Logit Bias"
},
"seed": {
"anyOf": [{ "type": "integer" }, { "type": "null" }],
"title": "Seed"
},
"model": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Model",
"description": "The model to use for generating completions."
},
"n": {
"anyOf": [{ "type": "integer" }, { "type": "null" }],
"title": "N",
"default": 1
},
"best_of": {
"anyOf": [{ "type": "integer" }, { "type": "null" }],
"title": "Best Of",
"default": 1
},
"user": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "User"
},
"top_k": {
"type": "integer",
"minimum": 0.0,
"title": "Top K",
"description": "Limit the next token selection to the K most probable tokens.\n\nTop-k sampling is a text generation method that selects the next token only from the top k most likely tokens predicted by the model. It helps reduce the risk of generating low-probability or nonsensical tokens, but it may also limit the diversity of the output. A higher value for top_k (e.g., 100) will consider more tokens and lead to more diverse text, while a lower value (e.g., 10) will focus on the most probable tokens and generate more conservative text.",
"default": 40
},
"repeat_penalty": {
"type": "number",
"minimum": 0.0,
"title": "Repeat Penalty",
"description": "A penalty applied to each token that is already generated. This helps prevent the model from repeating itself.\n\nRepeat penalty is a hyperparameter used to penalize the repetition of token sequences during text generation. It helps prevent the model from generating repetitive or monotonous text. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient.",
"default": 1.1
},
"logit_bias_type": {
"anyOf": [
{ "type": "string", "enum": ["input_ids", "tokens"] },
{ "type": "null" }
],
"title": "Logit Bias Type"
},
"mirostat_mode": {
"type": "integer",
"maximum": 2.0,
"minimum": 0.0,
"title": "Mirostat Mode",
"description": "Enable Mirostat constant-perplexity algorithm of the specified version (1 or 2; 0 = disabled)",
"default": 0
},
"mirostat_tau": {
"type": "number",
"maximum": 10.0,
"minimum": 0.0,
"title": "Mirostat Tau",
"description": "Mirostat target entropy, i.e. the target perplexity - lower values produce focused and coherent text, larger values produce more diverse and less coherent text",
"default": 5.0
},
"mirostat_eta": {
"type": "number",
"maximum": 1.0,
"minimum": 0.001,
"title": "Mirostat Eta",
"description": "Mirostat learning rate",
"default": 0.1
},
"grammar": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Grammar"
}
},
"type": "object",
"title": "CreateCompletionRequest",
"examples": [
{
"prompt": "\n\n### Instructions:\nWhat is the capital of France?\n\n### Response:\n",
"stop": ["\n", "###"]
}
]
},
"CreateCompletionResponse": {
"properties": {
"id": { "type": "string", "title": "Id" },
"object": {
"type": "string",
"const": "text_completion",
"title": "Object"
},
"created": { "type": "integer", "title": "Created" },
"model": { "type": "string", "title": "Model" },
"choices": {
"items": { "$ref": "#/components/schemas/CompletionChoice" },
"type": "array",
"title": "Choices"
},
"usage": { "$ref": "#/components/schemas/CompletionUsage" }
},
"type": "object",
"required": ["id", "object", "created", "model", "choices"],
"title": "CreateCompletionResponse"
},
"CreateEmbeddingRequest": {
"properties": {
"model": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Model",
"description": "The model to use for generating completions."
},
"input": {
"anyOf": [
{ "type": "string" },
{ "items": { "type": "string" }, "type": "array" }
],
"title": "Input",
"description": "The input to embed."
},
"user": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "User"
}
},
"type": "object",
"required": ["input"],
"title": "CreateEmbeddingRequest",
"examples": [{ "input": "The food was delicious and the waiter..." }]
},
"DetokenizeInputRequest": {
"properties": {
"model": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Model",
"description": "The model to use for generating completions."
},
"tokens": {
"items": { "type": "integer" },
"type": "array",
"title": "Tokens",
"description": "A list of toekns to detokenize."
}
},
"type": "object",
"required": ["tokens"],
"title": "DetokenizeInputRequest",
"example": [{ "tokens": [123, 321, 222] }]
},
"DetokenizeInputResponse": {
"properties": {
"text": {
"type": "string",
"title": "Text",
"description": "The detokenized text."
}
},
"type": "object",
"required": ["text"],
"title": "DetokenizeInputResponse",
"example": { "text": "How many tokens in this query?" }
},
"HTTPValidationError": {
"properties": {
"detail": {
"items": { "$ref": "#/components/schemas/ValidationError" },
"type": "array",
"title": "Detail"
}
},
"type": "object",
"title": "HTTPValidationError"
},
"ModelData": {
"properties": {
"id": { "type": "string", "title": "Id" },
"object": { "type": "string", "const": "model", "title": "Object" },
"owned_by": { "type": "string", "title": "Owned By" },
"permissions": {
"items": { "type": "string" },
"type": "array",
"title": "Permissions"
}
},
"type": "object",
"required": ["id", "object", "owned_by", "permissions"],
"title": "ModelData"
},
"ModelList": {
"properties": {
"object": { "type": "string", "const": "list", "title": "Object" },
"data": {
"items": { "$ref": "#/components/schemas/ModelData" },
"type": "array",
"title": "Data"
}
},
"type": "object",
"required": ["object", "data"],
"title": "ModelList"
},
"TokenizeInputCountResponse": {
"properties": {
"count": {
"type": "integer",
"title": "Count",
"description": "The number of tokens in the input."
}
},
"type": "object",
"required": ["count"],
"title": "TokenizeInputCountResponse",
"example": { "count": 5 }
},
"TokenizeInputRequest": {
"properties": {
"model": {
"anyOf": [{ "type": "string" }, { "type": "null" }],
"title": "Model",
"description": "The model to use for generating completions."
},
"input": {
"type": "string",
"title": "Input",
"description": "The input to tokenize."
}
},
"type": "object",
"required": ["input"],
"title": "TokenizeInputRequest",
"examples": [{ "input": "How many tokens in this query?" }]
},
"TokenizeInputResponse": {
"properties": {
"tokens": {
"items": { "type": "integer" },
"type": "array",
"title": "Tokens",
"description": "A list of tokens."
}
},
"type": "object",
"required": ["tokens"],
"title": "TokenizeInputResponse",
"example": { "tokens": [123, 321, 222] }
},
"ValidationError": {
"properties": {
"loc": {
"items": { "anyOf": [{ "type": "string" }, { "type": "integer" }] },
"type": "array",
"title": "Location"
},
"msg": { "type": "string", "title": "Message" },
"type": { "type": "string", "title": "Error Type" }
},
"type": "object",
"required": ["loc", "msg", "type"],
"title": "ValidationError"
}
},
"securitySchemes": { "HTTPBearer": { "type": "http", "scheme": "bearer" } }
}
}