diff --git a/docs/openapi/jan.yaml b/docs/openapi/jan.yaml index bfff0ad73..864c80fdf 100644 --- a/docs/openapi/jan.yaml +++ b/docs/openapi/jan.yaml @@ -67,20 +67,31 @@ paths: x-codeSamples: - lang: cURL source: | - curl http://localhost:1337/v1/chat/completions \ - -H "Content-Type: application/json" \ + curl -X 'POST' \ + 'http://localhost:1337/v1/chat/completions' \ + -H 'accept: application/json' \ + -H 'Content-Type: application/json' \ -d '{ - "model": "tinyllama-1.1b", "messages": [ { - "role": "system", - "content": "You are a helpful assistant." + "content": "You are a helpful assistant.", + "role": "system" }, { - "role": "user", - "content": "Hello!" + "content": "Hello!", + "role": "user" } - ] + ], + "model": "tinyllama-1.1b", + "stream": true, + "max_tokens": 2048, + "stop": [ + "hello" + ], + "frequency_penalty": 0, + "presence_penalty": 0, + "temperature": 0.7, + "top_p": 0.95 }' /models: get: @@ -103,7 +114,9 @@ paths: x-codeSamples: - lang: cURL source: | - curl http://localhost:1337/v1/models + curl -X 'GET' \ + 'http://localhost:1337/v1/models' \ + -H 'accept: application/json' "/models/download/{model_id}": get: operationId: downloadModel @@ -131,7 +144,9 @@ paths: x-codeSamples: - lang: cURL source: | - curl -X POST http://localhost:1337/v1/models/download/{model_id} + curl -X 'GET' \ + 'http://localhost:1337/v1/models/download/{model_id}' \ + -H 'accept: application/json' "/models/{model_id}": get: operationId: retrieveModel @@ -162,7 +177,9 @@ paths: x-codeSamples: - lang: cURL source: | - curl http://localhost:1337/v1/models/{model_id} + curl -X 'GET' \ + 'http://localhost:1337/v1/models/{model_id}' \ + -H 'accept: application/json' delete: operationId: deleteModel tags: @@ -191,7 +208,9 @@ paths: x-codeSamples: - lang: cURL source: | - curl -X DELETE http://localhost:1337/v1/models/{model_id} + curl -X 'DELETE' \ + 'http://localhost:1337/v1/models/{model_id}' \ + -H 'accept: application/json' /threads: post: operationId: createThread diff --git a/docs/openapi/specs/assistants.yaml b/docs/openapi/specs/assistants.yaml index d784c315a..5db1f6a97 100644 --- a/docs/openapi/specs/assistants.yaml +++ b/docs/openapi/specs/assistants.yaml @@ -316,4 +316,4 @@ components: deleted: type: boolean description: Indicates whether the assistant was successfully deleted. - example: true \ No newline at end of file + example: true diff --git a/docs/openapi/specs/chat.yaml b/docs/openapi/specs/chat.yaml index b324501a8..cfa391598 100644 --- a/docs/openapi/specs/chat.yaml +++ b/docs/openapi/specs/chat.yaml @@ -188,4 +188,4 @@ components: total_tokens: type: integer example: 533 - description: Total number of tokens used \ No newline at end of file + description: Total number of tokens used diff --git a/docs/openapi/specs/messages.yaml b/docs/openapi/specs/messages.yaml index d9d7d87a4..6f5fe1a58 100644 --- a/docs/openapi/specs/messages.yaml +++ b/docs/openapi/specs/messages.yaml @@ -1,3 +1,4 @@ +--- components: schemas: MessageObject: @@ -75,7 +76,7 @@ components: example: msg_abc123 object: type: string - description: "Type of the object, indicating it's a thread message." + description: Type of the object, indicating it's a thread message. default: thread.message created_at: type: integer @@ -88,7 +89,7 @@ components: example: thread_abc123 role: type: string - description: "Role of the sender, either 'user' or 'assistant'." + description: Role of the sender, either 'user' or 'assistant'. example: user content: type: array @@ -97,7 +98,7 @@ components: properties: type: type: string - description: "Type of content, e.g., 'text'." + description: Type of content, e.g., 'text'. example: text text: type: object @@ -110,21 +111,21 @@ components: type: array items: type: string - description: "Annotations for the text content, if any." + description: Annotations for the text content, if any. example: [] file_ids: type: array items: type: string - description: "Array of file IDs associated with the message, if any." + description: Array of file IDs associated with the message, if any. example: [] assistant_id: type: string - description: "Identifier of the assistant involved in the message, if applicable." + description: Identifier of the assistant involved in the message, if applicable. example: null run_id: type: string - description: "Run ID associated with the message, if applicable." + description: Run ID associated with the message, if applicable. example: null metadata: type: object @@ -139,7 +140,7 @@ components: example: msg_abc123 object: type: string - description: "Type of the object, indicating it's a thread message." + description: Type of the object, indicating it's a thread message. example: thread.message created_at: type: integer @@ -152,7 +153,7 @@ components: example: thread_abc123 role: type: string - description: "Role of the sender, either 'user' or 'assistant'." + description: Role of the sender, either 'user' or 'assistant'. example: user content: type: array @@ -161,7 +162,7 @@ components: properties: type: type: string - description: "Type of content, e.g., 'text'." + description: Type of content, e.g., 'text'. example: text text: type: object @@ -174,21 +175,21 @@ components: type: array items: type: string - description: "Annotations for the text content, if any." + description: Annotations for the text content, if any. example: [] file_ids: type: array items: type: string - description: "Array of file IDs associated with the message, if any." + description: Array of file IDs associated with the message, if any. example: [] assistant_id: type: string - description: "Identifier of the assistant involved in the message, if applicable." + description: Identifier of the assistant involved in the message, if applicable. example: null run_id: type: string - description: "Run ID associated with the message, if applicable." + description: Run ID associated with the message, if applicable. example: null metadata: type: object @@ -199,7 +200,7 @@ components: properties: object: type: string - description: "Type of the object, indicating it's a list." + description: Type of the object, indicating it's a list. default: list data: type: array @@ -226,7 +227,7 @@ components: example: msg_abc123 object: type: string - description: "Type of the object, indicating it's a thread message." + description: Type of the object, indicating it's a thread message. example: thread.message created_at: type: integer @@ -239,7 +240,7 @@ components: example: thread_abc123 role: type: string - description: "Role of the sender, either 'user' or 'assistant'." + description: Role of the sender, either 'user' or 'assistant'. example: user content: type: array @@ -248,7 +249,7 @@ components: properties: type: type: string - description: "Type of content, e.g., 'text'." + description: Type of content, e.g., 'text'. text: type: object properties: @@ -260,20 +261,20 @@ components: type: array items: type: string - description: "Annotations for the text content, if any." + description: Annotations for the text content, if any. file_ids: type: array items: type: string - description: "Array of file IDs associated with the message, if any." + description: Array of file IDs associated with the message, if any. example: [] assistant_id: type: string - description: "Identifier of the assistant involved in the message, if applicable." + description: Identifier of the assistant involved in the message, if applicable. example: null run_id: type: string - description: "Run ID associated with the message, if applicable." + description: Run ID associated with the message, if applicable. example: null metadata: type: object @@ -309,4 +310,4 @@ components: data: type: array items: - $ref: "#/components/schemas/MessageFileObject" \ No newline at end of file + $ref: "#/components/schemas/MessageFileObject" diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml index 8113f3ab8..40e6abaaf 100644 --- a/docs/openapi/specs/models.yaml +++ b/docs/openapi/specs/models.yaml @@ -18,114 +18,82 @@ components: Model: type: object properties: - type: + source_url: type: string - default: model - description: The type of the object. - version: - type: string - default: "1" - description: The version number of the model. + format: uri + description: URL to the source of the model. + example: https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf id: type: string - description: Unique identifier used in chat-completions model_name, matches + description: + Unique identifier used in chat-completions model_name, matches folder name. - example: zephyr-7b + example: trinity-v1.2-7b + object: + type: string + example: model name: type: string description: Name of the model. - example: Zephyr 7B - owned_by: + example: Trinity-v1.2 7B Q4 + version: type: string - description: Compatibility field for OpenAI. - default: "" - created: - type: integer - format: int64 - description: Unix timestamp representing the creation time. + default: "1.0" + description: The version number of the model. description: type: string description: Description of the model. - state: - type: string - enum: - - null - - downloading - - ready - - starting - - stopping - description: Current state of the model. + example: + Trinity is an experimental model merge using the Slerp method. + Recommended for daily assistance purposes. format: type: string description: State format of the model, distinct from the engine. - example: ggufv3 - source: - type: array - items: - type: object - properties: - url: - format: uri - description: URL to the source of the model. - example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf - filename: - type: string - description: Filename of the model. - example: zephyr-7b-beta.Q4_K_M.gguf + example: gguf settings: type: object properties: ctx_len: - type: string + type: integer description: Context length. - example: "4096" - ngl: + example: 4096 + prompt_template: type: string - description: Number of layers. - example: "100" - embedding: - type: string - description: Indicates if embedding is enabled. - example: "true" - n_parallel: - type: string - description: Number of parallel processes. - example: "4" + example: "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" additionalProperties: false parameters: type: object properties: temperature: - type: string - description: Temperature setting for the model. - example: "0.7" - token_limit: - type: string - description: Token limit for the model. - example: "4096" - top_k: - type: string - description: Top-k setting for the model. - example: "0" + example: 0.7 top_p: - type: string - description: Top-p setting for the model. - example: "1" + example: 0.95 stream: - type: string - description: Indicates if streaming is enabled. - example: "true" + example: true + max_tokens: + example: 4096 + stop: + example: [] + frequency_penalty: + example: 0 + presence_penalty: + example: 0 additionalProperties: false metadata: - type: object - description: Additional metadata. - assets: - type: array - items: + author: type: string - description: List of assets related to the model. - required: - - source + example: Jan + tags: + example: + - 7B + - Merged + - Featured + size: + example: 4370000000, + cover: + example: https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png + engine: + example: nitro ModelObject: type: object properties: @@ -133,7 +101,7 @@ components: type: string description: | The identifier of the model. - example: zephyr-7b + example: trinity-v1.2-7b object: type: string description: | @@ -153,197 +121,89 @@ components: GetModelResponse: type: object properties: + source_url: + type: string + format: uri + description: URL to the source of the model. + example: https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf id: type: string - description: The identifier of the model. - example: zephyr-7b + description: + Unique identifier used in chat-completions model_name, matches + folder name. + example: mistral-ins-7b-q4 object: type: string - description: Type of the object, indicating it's a model. - default: model - created: - type: integer - format: int64 - description: Unix timestamp representing the creation time of the model. - owned_by: + example: model + name: type: string - description: The entity that owns the model. - example: _ - state: + description: Name of the model. + example: Mistral Instruct 7B Q4 + version: type: string - enum: - - not_downloaded - - downloaded - - running - - stopped - description: The current state of the model. - source: - type: array - items: - type: object - properties: - url: - format: uri - description: URL to the source of the model. - example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf - filename: - type: string - description: Filename of the model. - example: zephyr-7b-beta.Q4_K_M.gguf - engine_parameters: - type: object - properties: - pre_prompt: - type: string - description: Predefined prompt used for setting up internal configurations. - default: "" - example: Initial setup complete. - system_prompt: - type: string - description: Prefix used for system-level prompts. - default: "SYSTEM: " - user_prompt: - type: string - description: Prefix used for user prompts. - default: "USER: " - ai_prompt: - type: string - description: Prefix used for assistant prompts. - default: "ASSISTANT: " - ngl: - type: integer - description: Number of neural network layers loaded onto the GPU for - acceleration. - minimum: 0 - maximum: 100 - default: 100 - example: 100 - ctx_len: - type: integer - description: Context length for model operations, varies based on the specific - model. - minimum: 128 - maximum: 4096 - default: 4096 - example: 4096 - n_parallel: - type: integer - description: Number of parallel operations, relevant when continuous batching is - enabled. - minimum: 1 - maximum: 10 - default: 1 - example: 4 - cont_batching: - type: boolean - description: Indicates if continuous batching is used for processing. - default: false - example: false - cpu_threads: - type: integer - description: Number of threads allocated for CPU-based inference. - minimum: 1 - example: 8 - embedding: - type: boolean - description: Indicates if embedding layers are enabled in the model. - default: true - example: true - model_parameters: + default: "1.0" + description: The version number of the model. + description: + type: string + description: Description of the model. + example: + Trinity is an experimental model merge using the Slerp method. + Recommended for daily assistance purposes. + format: + type: string + description: State format of the model, distinct from the engine. + example: gguf + settings: type: object properties: ctx_len: type: integer - description: Maximum context length the model can handle. - minimum: 0 - maximum: 4096 - default: 4096 + description: Context length. example: 4096 - ngl: - type: integer - description: Number of layers in the neural network. - minimum: 1 - maximum: 100 - default: 100 - example: 100 - embedding: - type: boolean - description: Indicates if embedding layers are used. - default: true - example: true - n_parallel: - type: integer - description: Number of parallel processes the model can run. - minimum: 1 - maximum: 10 - default: 1 - example: 4 + prompt_template: + type: string + example: "[INST] {prompt} [/INST]" + additionalProperties: false + parameters: + type: object + properties: temperature: - type: number - description: Controls randomness in model's responses. Higher values lead to - more random responses. - minimum: 0 - maximum: 2 - default: 0.7 example: 0.7 - token_limit: - type: integer - description: Maximum number of tokens the model can generate in a single - response. - minimum: 1 - maximum: 4096 - default: 4096 - example: 4096 - top_k: - type: integer - description: Limits the model to consider only the top k most likely next tokens - at each step. - minimum: 0 - maximum: 100 - default: 0 - example: 0 top_p: - type: number - description: Nucleus sampling parameter. The model considers the smallest set of - tokens whose cumulative probability exceeds the top_p value. - minimum: 0 - maximum: 1 - default: 1 - example: 1 + example: 0.95 + stream: + example: true + max_tokens: + example: 4096 + stop: + example: [] + frequency_penalty: + example: 0 + presence_penalty: + example: 0 + additionalProperties: false metadata: - type: object - properties: - engine: - type: string - description: The engine used by the model. - enum: - - nitro - - openai - - hf_inference - quantization: - type: string - description: Quantization parameter of the model. - example: Q3_K_L - size: - type: string - description: Size of the model. - example: 7B - required: - - id - - object - - created - - owned_by - - state - - source - - parameters - - metadata + author: + type: string + example: MistralAI + tags: + example: + - 7B + - Featured + - Foundation Model + size: + example: 4370000000, + cover: + example: https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png + engine: + example: nitro DeleteModelResponse: type: object properties: id: type: string description: The identifier of the model that was deleted. - example: model-zephyr-7B + example: mistral-ins-7b-q4 object: type: string description: Type of the object, indicating it's a model. diff --git a/docs/openapi/specs/threads.yaml b/docs/openapi/specs/threads.yaml index fe00f7588..40b2463fa 100644 --- a/docs/openapi/specs/threads.yaml +++ b/docs/openapi/specs/threads.yaml @@ -142,7 +142,7 @@ components: example: Jan instructions: type: string - description: | + description: > The instruction of assistant, defaults to "Be my grammar corrector" model: type: object @@ -224,4 +224,4 @@ components: deleted: type: boolean description: Indicates whether the thread was successfully deleted. - example: true \ No newline at end of file + example: true