From df883a7cb852ff0babc29523750a5c8907fd16f5 Mon Sep 17 00:00:00 2001 From: hiro Date: Sat, 18 Nov 2023 17:04:51 +0700 Subject: [PATCH] fix(spec): model spec update --- docs/docs/docs/specs/models.md | 241 +++++++++++++++++++++++---------- 1 file changed, 166 insertions(+), 75 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index d04d2b2c3..053c99d3d 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -46,7 +46,7 @@ _Users can override run settings at runtime_ | `object` | enum: `model`, `assistant`, `thread`, `message` | Type of the Jan Object. Always `model` | Defaults to "model" | | `name` | string | A vanity name | Defaults to filename | | `description` | string | A vanity description of the model | Defaults to "" | -| `state` | enum[`running` , `stopped`, `not-downloaded` , `downloading`] | Needs more thought | Defaults to `not-downloaded` | +| `state` | enum[`to_download` , `downloading`, `ready` , `running`] | Needs more thought | Defaults to `to_download` | | `parameters` | map | Defines default model run parameters used by any assistant. | Defaults to `{}` | | `metadata` | map | Stores additional structured information about the model. | Defaults to `{}` | | `metadata.engine` | enum: `llamacpp`, `api`, `tensorrt` | The model backend used to run model. | Defaults to "llamacpp" | @@ -83,10 +83,10 @@ Additionally, Jan supports importing popular formats. For example, if you provid Supported URL formats with custom importers: -- `huggingface/thebloke`: `TODO: URL here` +- `huggingface/thebloke`: [Link](https://huggingface.co/TheBloke/Llama-2-7B-GGUF) - `janhq`: `TODO: put URL here` -- `azure_openai`: `TODO: put URL here` -- `openai`: `TODO: put URL here` +- `azure_openai`: `https://docs-test-001.openai.azure.com/openai.azure.com/docs-test-001/gpt4-turbo` +- `openai`: `api.openai.com` ### Generic Example @@ -98,52 +98,66 @@ Supported URL formats with custom importers: // Note: Default fields omitted for brevity "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", "parameters": { - "ctx_len": 2048, - "ngl": 100, - "embedding": true, - "n_parallel": 4, + "init": { + "ctx_len": "2048", + "ngl": "100", + "embedding": "true", + "n_parallel": "4", "pre_prompt": "A chat between a curious user and an artificial intelligence", "user_prompt": "USER: ", "ai_prompt": "ASSISTANT: " + }, + "runtime": { "temperature": "0.7", "token_limit": "2048", - "top_k": "..", - "top_p": "..", + "top_k": "0", + "top_p": "1", + "stream": "true" + } }, "metadata": { - "quantization": "..", - "size": "..", + "engine": "llamacpp", + "quantization": "Q3_K_L", + "size": "7B", } ``` ### Example: multiple binaries -- Model has multiple binaries +- Model has multiple binaries `model-llava-1.5-ggml.json` - See [source](https://huggingface.co/mys/ggml_llava-v1.5-13b) ```json -"source_url": "https://huggingface.co/mys/ggml_llava-v1.5-13b" +"source_url": "https://huggingface.co/mys/ggml_llava-v1.5-13b", +"parameters": {"init": {}, "runtime": {}} "metadata": { - "binaries": "..", // TODO: what should this property be + "mmproj_binary": "https://huggingface.co/mys/ggml_llava-v1.5-13b/blob/main/mmproj-model-f16.gguf", + "ggml_binary": "https://huggingface.co/mys/ggml_llava-v1.5-13b/blob/main/ggml-model-q5_k.gguf", + "engine": "llamacpp", + "quantization": "Q5_K" } ``` ### Example: Azure API -- Using a remote API to access model +- Using a remote API to access model `model-azure-openai-gpt4-turbo.json` - See [source](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api) ```json "source_url": "https://docs-test-001.openai.azure.com/openai.azure.com/docs-test-001/gpt4-turbo", "parameters": { + "init" { "API-KEY": "", "DEPLOYMENT-NAME": "", - "api-version": "2023-05-15", + "api-version": "2023-05-15" + }, + "runtime": { "temperature": "0.7", "max_tokens": "2048", "presence_penalty": "0", "top_p": "1", "stream": "true" + } } "metadata": { "engine": "api", @@ -155,7 +169,7 @@ Supported URL formats with custom importers: - Everything needed to represent a `model` is packaged into an `Model folder`. - The `folder` is standalone and can be easily zipped, imported, and exported, e.g. to Github. - The `folder` always contains at least one `Model Object`, declared in a `json` format. - - The `folder` and `file` do not have to share the same name +- The `folder` and `file` do not have to share the same name - The model `id` is made up of `folder_name/filename` and is thus always unique. ```sh @@ -170,11 +184,9 @@ Supported URL formats with custom importers: ``` ### Default ./model folder - - Jan ships with a default model folders containing recommended models - Only the Model Object `json` files are included - Users must later explicitly download the model binaries - ```sh models/ mistral-7b/ @@ -182,7 +194,6 @@ models/ hermes-7b/ hermes-7b.json ``` - ### Multiple quantizations - Each quantization has its own `Jan Model Object` file @@ -193,7 +204,6 @@ llama2-7b-gguf/ llama2-7b-gguf-Q3_K_L.json .bin ``` - ### Multiple model partitions - A Model that is partitioned into several binaries use just 1 file @@ -204,8 +214,7 @@ llava-ggml/ .proj ggml ``` - -### ?? whats this example for? +### Your locally fine-tuned model - ?? @@ -214,67 +223,149 @@ llama-70b-finetune/ llama-70b-finetune-q5.json .bin ``` - ## Jan API - ### Model API Object - - The `Jan Model Object` maps into the `OpenAI Model Object`. - Properties marked with `*` are compatible with the [OpenAI `model` object](https://platform.openai.com/docs/api-reference/models) - Note: The `Jan Model Object` has additional properties when retrieved via its API endpoint. -- https://platform.openai.com/docs/api-reference/models/object +> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/object -| Property | Type | Public Description | Jan Model Object (`m`) Property | -| ------------- | -------------- | ----------------------------------------------------------- | -------------------------------------------- | -| `id`\* | string | Model uuid; also the file location under `/models` | `folder/filename` | -| `object`\* | string | Always "model" | `m.object` | -| `created`\* | integer | Timestamp when model was created. | `m.json` creation time | -| `owned_by`\* | string | The organization that owns the model. | grep author from `m.source_url` OR $(whoami) | -| `name` | string or null | A display name | `m.name` or filename | -| `description` | string | A vanity description of the model | `m.description` | -| `state` | enum | | | -| `parameters` | map | Defines default model run parameters used by any assistant. | | -| `metadata` | map | Stores additional structured information about the model. | | - -### List models - -- https://platform.openai.com/docs/api-reference/models/list - -TODO: @hiro +### Model lifecycle +Model has 4 states (enum) +- `to_download` +- `downloading` +- `ready` +- `running` ### Get Model - -- https://platform.openai.com/docs/api-reference/models/retrieve - -TODO: @hiro - +> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/retrieve +- Example request +```shell +curl {JAN_URL}/v1/models/{model_id} +``` +- Example response +```json +{ + "id": "model-zephyr-7B", + "object": "model", + "created_at": 1686935002, + "owned_by": "thebloke", + "state": "running", + "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", + "parameters": { + "ctx_len": 2048, + "ngl": 100, + "embedding": true, + "n_parallel": 4, + "pre_prompt": "A chat between a curious user and an artificial intelligence", + "user_prompt": "USER: ", + "ai_prompt": "ASSISTANT: ", + "temperature": "0.7", + "token_limit": "2048", + "top_k": "0", + "top_p": "1", + }, + "metadata": { + "engine": "llamacpp", + "quantization": "Q3_K_L", + "size": "7B", + } +} +``` +### List models +Lists the currently available models, and provides basic information about each one such as the owner and availability. +> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/list +- Example request +```shell= +curl {JAN_URL}/v1/models +``` +- Example response +```json +{ + "object": "list", + "data": [ + { + "id": "model-zephyr-7B", + "object": "model", + "created_at": 1686935002, + "owned_by": "thebloke", + "state": "running" + }, + { + "id": "ft-llama-70b-gguf", + "object": "model", + "created_at": 1686935002, + "owned_by": "you", + "state": "stopped" + }, + { + "id": "model-azure-openai-gpt4-turbo", + "object": "model", + "created_at": 1686935002, + "owned_by": "azure_openai", + "state": "running" + }, + ], + "object": "list" +} +``` ### Delete Model - -- https://platform.openai.com/docs/api-reference/models/delete - -TODO: @hiro - -### Get Model State - -> Jan-only endpoint -> TODO: @hiro - -### Get Model Metadata - -> Jan-only endpoint -> TODO: @hiro - -### Download Model - -> Jan-only endpoint -> TODO: @hiro - +> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/delete +`- Example request +```shell +curl -X DELETE {JAN_URL}/v1/models/{model_id} +``` +- Example response +```json +{ + "id": "model-zephyr-7B", + "object": "model", + "deleted": true, + "state": "to_download" +} +``` ### Start Model - > Jan-only endpoint -> TODO: @hiro - +The request to start `model` by changing model state from `ready` to `running` +- Example request +```shell +curl -X PUT {JAN_URL}/v1/models{model_id}/start +``` +- Example response +```json +{ + "id": "model-zephyr-7B", + "object": "model", + "state": "running" +} +``` ### Stop Model - > Jan-only endpoint -> TODO: @hiro +The request to start `model` by changing model state from `running` to `ready` +- Example request +```shell +curl -X PUT {JAN_URL}/v1/models/{model_id}/stop +``` +- Example response +```json +{ + "id": "model-zephyr-7B", + "object": "model", + "state": "ready" +} +``` +### Download Model +> Jan-only endpoint +The request to download `model` by changing model state from `to_download` to `downloading` then `ready`once it's done. +- Example request +```shell +curl -X POST {JAN_URL}/v1/models/ +``` +- Example response +```json +{ + "id": "model-zephyr-7B", + "object": "model", + "state": "downloading" +} +``` \ No newline at end of file