diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml index 2a04467de..2eb9b815b 100644 --- a/docs/openapi/jan.yaml +++ b/docs/openapi/jan.yaml @@ -14,8 +14,10 @@ servers: tags: - name: Models description: List and describe the various models available in the API. - - name: Chat Completion - description: Given a list of messages comprising a conversation, the model will return a response. + - name: Chat + description: | + Given a list of messages comprising a conversation, the model will return a response. + - name: Messages description: | Messages capture a conversation's content. This can include the content from LLM responses and other metadata from [chat completions](/specs/chats). @@ -38,13 +40,51 @@ x-tagGroups: - name: Endpoints tags: - Models - - Chat Completion + - Chat - name: Chat tags: - Assistants - Messages - Threads paths: + /chat/completions: + post: + operationId: createChatCompletion + tags: + - Chat + summary: Create chat completion + description: | + + and Jan specified + + Creates a model response for the given chat conversation. + requestBody: + content: + application/json: + schema: + $ref: "specs/chat.yaml#/components/schemas/ChatCompletionRequest" + responses: + "200": + description: OK + content: + application/json: + schema: + $ref: "specs/chat.yaml#/components/schemas/ChatCompletionResponse" + x-codeSamples: + - lang: "curl" + source: | + curl -X POST 'http://localhost:3982/inferences/llamacpp/chat_completion' \ + -H "Content-Type: application/json" \ + -d '{ + "llama_model_path": "/path/to/your/model.gguf", + "messages": [ + { + "role": "user", + "content": "hello" + }, + ] + }' + ### MODELS /models: get: @@ -54,10 +94,9 @@ paths: summary: List models description: | - + Lists the currently available models, and provides basic information about each one such as the owner and availability. - responses: "200": description: OK @@ -96,7 +135,7 @@ paths: - Models summary: Retrieve model description: | - + Get a model instance, providing basic information about the model such as the owner and permissioning. parameters: @@ -128,7 +167,7 @@ paths: - Models summary: Delete model description: | - + Delete a model. parameters: @@ -192,7 +231,7 @@ paths: summary: Stop model description: | Jan - + Stop an imported model. parameters: - in: path @@ -589,12 +628,12 @@ paths: 'write:pets': modify pets in your account 'read:pets': read your pets description: | - + Returns a list of message files. - + parameters: @@ -675,7 +714,7 @@ x-webhooks: post: summary: The model object description: | - + Describe a model offering that can be used with the API. diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml new file mode 100644 index 000000000..7aef0cfe0 --- /dev/null +++ b/docs/openapi/specs/chat.yaml @@ -0,0 +1,197 @@ +components: + schemas: + ChatObject: + type: object + properties: + messages: + type: arrays + description: | + Contains input data or prompts for the model to process + example: + [ + { "content": "Hello there :wave:", "role": "assistant" }, + { "content": "Can you write a long story", "role": "user" }, + ] + stream: + type: boolean + default: true + description: Enables continuous output generation, allowing for streaming of model responses + model: + type: string + example: "gpt-3.5-turbo" + description: Specifies the model being used for inference or processing tasks + max_tokens: + type: number + default: 2048 + description: The maximum number of tokens the model will generate in a single response + stop: + type: arrays + example: ["hello"] + description: Defines specific tokens or phrases at which the model will stop generating further output + frequency_penalty: + type: number + default: 0 + description: Adjusts the likelihood of the model repeating words or phrases in its output + presence_penalty: + type: number + default: 0 + description: Influences the generation of new and varied concepts in the model's output + temperature: + type: number + default: 0.7 + min: 0 + max: 1 + description: Controls the randomness of the model's output + top_p: + type: number + default: 0.95 + min: 0 + max: 1 + description: Set probability threshold for more relevant outputs + cache_prompt: + type: boolean + default: true + description: Optimize performance in repeated or similar requests. + ChatCompletionRequest: + type: object + properties: + messages: + type: arrays + description: | + Contains input data or prompts for the model to process + + OpenAI compatible + example: + [ + { "content": "Hello there :wave:", "role": "assistant" }, + { "content": "Can you write a long story", "role": "user" }, + ] + model: + type: string + example: model-zephyr-7B + description: | + Specifies the model being used for inference or processing tasks + + OpenAI compatible + stream: + type: boolean + default: true + description: | + Enables continuous output generation, allowing for streaming of model responses + + OpenAI compatible + max_tokens: + type: number + default: 2048 + description: | + The maximum number of tokens the model will generate in a single response + + OpenAI compatible + stop: + type: arrays + example: ["hello"] + description: | + Defines specific tokens or phrases at which the model will stop generating further output + + OpenAI compatible + frequency_penalty: + type: number + default: 0 + description: | + Adjusts the likelihood of the model repeating words or phrases in its output + + OpenAI compatible + presence_penalty: + type: number + default: 0 + description: | + Influences the generation of new and varied concepts in the model's output + + OpenAI compatible + temperature: + type: number + default: 0.7 + min: 0 + max: 1 + description: | + Controls the randomness of the model's output + + OpenAI compatible + top_p: + type: number + default: 0.95 + min: 0 + max: 1 + description: | + Set probability threshold for more relevant outputs + + OpenAI compatible + + ChatCompletionResponse: + type: object + description: Description of the response structure + properties: + choices: + type: array + description: Array of choice objects + items: + type: object + properties: + finish_reason: + type: string + nullable: true + example: null + description: Reason for finishing the response, if applicable + index: + type: integer + example: 0 + description: Index of the choice + message: + type: object + properties: + content: + type: string + example: "Hello user. What can I help you with?" + description: Content of the message + role: + type: string + example: assistant + description: Role of the sender + created: + type: integer + example: 1700193928 + description: Timestamp of when the response was created + id: + type: string + example: ebwd2niJvJB1Q2Whyvkz + description: Unique identifier of the response + model: + type: string + nullable: true + example: _ + description: Model used for generating the response + object: + type: string + example: chat.completion + description: Type of the response object + system_fingerprint: + type: string + nullable: true + example: _ + description: System fingerprint + usage: + type: object + description: Information about the usage of tokens + properties: + completion_tokens: + type: integer + example: 500 + description: Number of tokens used for completion + prompt_tokens: + type: integer + example: 33 + description: Number of tokens used in the prompt + total_tokens: + type: integer + example: 533 + description: Total number of tokens used