From d2b26d480607b0dd2bc7dd424b7afe343f2b1a45 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 16 Nov 2023 16:09:22 +0700 Subject: [PATCH 1/7] feat: Quick round 1st draft --- docs/docs/docs/specs/models.md | 179 ++++++++++++++++++++++++--------- 1 file changed, 133 insertions(+), 46 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index 29b45f6a0..b2de17bf4 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -8,37 +8,99 @@ Models are AI models like Llama and Mistral ## Model Object -- `model.json` +- LOCAL MODEL `model-zephyr-7B.json` + - Reference: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/ > Equivalent to: https://platform.openai.com/docs/api-reference/models/object -```json -{ - // OpenAI model compatibility - // https://platform.openai.com/docs/api-reference/models) - "id": "llama-2-uuid", - "object": "model", - "created": 1686935002, - "owned_by": "you" +```sh= +# Required - // Model settings (benchmark: Ollama) - // https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#template - "model_name": "llama2", - "model_path": "ROOT/models/...", - "parameters": { - "temperature": "..", - "token-limit": "..", - "top-k": "..", - "top-p": ".." - }, - "template": "This is a full prompt template", - "system": "This is a system prompt", +"url": TheBloke/zephyr-7B-beta-GGUF + +# Optional - by default use `default`` +import_format: thebloke +# default # downloads the whole thing +# thebloke # custom importer (detects from URL) +# janhq # Custom importers +# openai +"default_download": zephyr-7b-beta.Q2_K.gguf # optional, by default download model with recommended hardware - // Model metadata (benchmark: HuggingFace) - "version": "...", - "author": "...", - "tags": "...", - ... +# Optional: OpenAI format +"id": "/huggingface.co/TheBloke/zephyr-7B-beta-GGUF", # Autofilled by Jan with required URL above +"object": "model", +"created": 1686935002, +"owned_by": "TheBloke" + +# Optional: params +"init_parameters": { + "ctx_len": 2048, + "ngl": 100, + "embedding": true, + "n_parallel": 4, + "pre_prompt": "A chat between a curious user and an artificial intelligence", + "user_prompt": "USER: ", + "ai_prompt": "ASSISTANT: " +}, + +"runtime_parameters": { + "temperature": "0.7", + "token_limit": "2048", + "top_k": "", + "top_p": "..", +} + +// Jan specific configs +"metadata": { // @Q: should we put all under "jan" + "engine": "api", // enum[llamacpp,api] +} +``` + +- REMOTE MODEL `model-azure-openai-gpt4-turbo.json` + - Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api + +> Equivalent to: https://platform.openai.com/docs/api-reference/models/object + +```sh= +# Required + +"url": https://docs-test-001.openai.azure.com/ # This is `api.openai.com` if it's OpenAI platform + +# Optional - by default use `default`` +import_format: azure_openai +# default # downloads the whole thing +# thebloke # custom importer (detects from URL) +# janhq # Custom importers +# azure_openai # Custom importers +# openai # Custom importers +"default_download": zephyr-7b-beta.Q2_K.gguf # optional, by default download model with recommended hardware + +# Optional: OpenAI format +"id": "/openai.azure.com/docs-test-001/gpt4-turbo", # Autofilled by Jan with required URL above +"object": "model", +"created": 1686935002, +"owned_by": "OpenAI Azure" + +# Optional: params +# This is the one model gets configured and cannot be changed by assistant +"init_parameters": { + "API-KEY": "", + "DEPLOYMENT-NAME": "", + "api-version": "2023-05-15" +}, + +# This is the one that assistant can override +"runtime_parameters": { + "temperature": "0.7", + "max_tokens": "2048", + "presence_penalty": "0", + "top_p": "1", + "stream": "true" +} + +// Jan specific configs +"metadata": { // @Q: should we put all under "jan" + "engine": "api", // enum[llamacpp,api] } ``` @@ -49,33 +111,58 @@ See [/model](/api/model) - Equivalent to: https://platform.openai.com/docs/api-reference/models ```sh -GET https://localhost:1337/v1/models # List models -GET https://localhost:1337/v1/models/{model} # Get model object -DELETE https://localhost:1337/v1/models/{model} # Delete model +# List models +GET https://localhost:1337/v1/models?filter=[enum](all,running,downloaded,downloading) +List[model_object] + +# Get model object +GET https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B +model_object + +# Delete model +DELETE https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B -TODO: -# Start model # Stop model +PUT https://localhost:1337/v1/models/{model_id}/stop # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B + +# Start model +PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B +{ + "id": [string] # The model name to be used in `chat_completion` = model_id + "model_parameters": [jsonPayload], + "engine": [enum](llamacpp) +} ``` ## Model Filesystem How `models` map onto your local filesystem -```sh +```shell= /janroot /models - /modelA - model.json # Default model params - modelA.gguf - modelA.bin - /modelB/* - model.json - modelB.gguf - /assistants - model.json # Defines model, default: looks in `/models` - /models # Optional /models folder that overrides root - /modelA - model.json - modelA.bin -``` + llama2-70b.json + llama2-7b-gguf.json + + huggingface.co/ # Model registries (de-factor open source) + meta-llama/ + llama2-70b-chat-hf/ + llama2-7b-chat/ + thebloke/ + llama2-70b-chat-hf-gguf/ + llama2-7b-chat/ + llama7b_q2_K_L.gguf + llama7b_q3_K_L.gguf + model.louis.ai/ # Private model registries + meta-llama/ + llama2-70b-chat-hf-tensorrt-llm/ + llama2-70b-chat-hf-awq/ + model.json + thebloke/ + llava-1-5-gguf/ # Use case with multiple model + mmproj.bin + model-q5.ggml + + llama-70b-finetune.bin + llama-70b-finetune.json +``` \ No newline at end of file From 6ca8ce24db50f077e54d3b5760ddfbfd930d0834 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 16 Nov 2023 17:10:11 +0700 Subject: [PATCH 2/7] fix: Update content based on engs sync --- docs/docs/docs/specs/models.md | 186 ++++++++++++++++++++------------- 1 file changed, 114 insertions(+), 72 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index b2de17bf4..8d95043c2 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -7,64 +7,106 @@ Models are AI models like Llama and Mistral > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models ## Model Object - -- LOCAL MODEL `model-zephyr-7B.json` - - Reference: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/ - > Equivalent to: https://platform.openai.com/docs/api-reference/models/object -```sh= -# Required +- LOCAL MODEL - 1 binary `model-zephyr-7B.json` - [Reference](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/) -"url": TheBloke/zephyr-7B-beta-GGUF +```json +# Required +"origin": "TheBloke/zephyr-7B-beta-GGUF" # Optional - by default use `default`` -import_format: thebloke +"import_format": "thebloke" # default # downloads the whole thing # thebloke # custom importer (detects from URL) # janhq # Custom importers # openai -"default_download": zephyr-7b-beta.Q2_K.gguf # optional, by default download model with recommended hardware + +# optional, by default download model with recommended hardware +"download_url": "zephyr-7b-beta.Q2_K.gguf" - +# https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q2_K.gguf?download=true # Optional: OpenAI format -"id": "/huggingface.co/TheBloke/zephyr-7B-beta-GGUF", # Autofilled by Jan with required URL above +"id": {model_file_name}, # No need to specify, only need to return in API "object": "model", -"created": 1686935002, +"created": 1686935002, # Unix timestamp "owned_by": "TheBloke" # Optional: params -"init_parameters": { - "ctx_len": 2048, - "ngl": 100, - "embedding": true, - "n_parallel": 4, - "pre_prompt": "A chat between a curious user and an artificial intelligence", - "user_prompt": "USER: ", - "ai_prompt": "ASSISTANT: " -}, - -"runtime_parameters": { - "temperature": "0.7", - "token_limit": "2048", - "top_k": "", - "top_p": "..", +parameters: { + "init": { + "ctx_len": 2048, + "ngl": 100, + "embedding": true, + "n_parallel": 4, + "pre_prompt": "A chat between a curious user and an artificial intelligence", + "user_prompt": "USER: ", + "ai_prompt": "ASSISTANT: " + }, + "runtime": { + "temperature": "0.7", + "token_limit": "2048", + "top_k": "", + "top_p": "..", + } } // Jan specific configs "metadata": { // @Q: should we put all under "jan" - "engine": "api", // enum[llamacpp,api] + "engine": "llamacpp", // enum[llamacpp,api] } ``` -- REMOTE MODEL `model-azure-openai-gpt4-turbo.json` - - Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api +- LOCAL MODEL - multiple binaries `model-llava-v1.5-ggml.json` [Reference](https://huggingface.co/mys/ggml_llava-v1.5-13b) -> Equivalent to: https://platform.openai.com/docs/api-reference/models/object - -```sh= +```json # Required -"url": https://docs-test-001.openai.azure.com/ # This is `api.openai.com` if it's OpenAI platform +"origin": "mys/ggml_llava-v1.5-13b" + +# Optional - by default use `default`` +"import_format": "default" +# default # downloads the whole thing +# thebloke # custom importer (detects from URL) +# janhq # Custom importers +# openai + +# Optional: OpenAI format +"id": {model_file_name}, # No need to specify, only need to return in API"object": "model", +"created": 1686935002, +"owned_by": "TheBloke" + +# Optional: params +parameters: { + "init": { + "ctx_len": 2048, + "ngl": 100, + "embedding": true, + "n_parallel": 4, + "pre_prompt": "A chat between a curious user and an artificial intelligence", + "user_prompt": "USER: ", + "ai_prompt": "ASSISTANT: " + }, + "runtime": { + "temperature": "0.7", + "token_limit": "2048", + "top_k": "", + "top_p": "..", + } +} + +// Jan specific configs +"metadata": { // @Q: should we put all under "jan" + "engine": "llamacpp", // enum[llamacpp,api] +} +``` + +- REMOTE MODEL `model-azure-openai-gpt4-turbo.json` - [Reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/)quickstart?tabs=command-line%2Cpython&pivots=rest-api + +```json +# Required +"origin": "https://docs-test-001.openai.azure.com/" +# This is `api.openai.com` if it's OpenAI platform # Optional - by default use `default`` import_format: azure_openai @@ -73,7 +115,6 @@ import_format: azure_openai # janhq # Custom importers # azure_openai # Custom importers # openai # Custom importers -"default_download": zephyr-7b-beta.Q2_K.gguf # optional, by default download model with recommended hardware # Optional: OpenAI format "id": "/openai.azure.com/docs-test-001/gpt4-turbo", # Autofilled by Jan with required URL above @@ -83,19 +124,20 @@ import_format: azure_openai # Optional: params # This is the one model gets configured and cannot be changed by assistant -"init_parameters": { - "API-KEY": "", - "DEPLOYMENT-NAME": "", - "api-version": "2023-05-15" -}, -# This is the one that assistant can override -"runtime_parameters": { - "temperature": "0.7", - "max_tokens": "2048", - "presence_penalty": "0", - "top_p": "1", - "stream": "true" +parameters: { + "init": { + "API-KEY": "", + "DEPLOYMENT-NAME": "", + "api-version": "2023-05-15" + }, + "runtime": { + "temperature": "0.7", + "max_tokens": "2048", + "presence_penalty": "0", + "top_p": "1", + "stream": "true" + } } // Jan specific configs @@ -105,7 +147,6 @@ import_format: azure_openai ``` ## Model API - See [/model](/api/model) - Equivalent to: https://platform.openai.com/docs/api-reference/models @@ -130,7 +171,7 @@ PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model { "id": [string] # The model name to be used in `chat_completion` = model_id "model_parameters": [jsonPayload], - "engine": [enum](llamacpp) + "engine": [enum](llamacpp,openai) } ``` @@ -141,28 +182,29 @@ How `models` map onto your local filesystem ```shell= /janroot /models - llama2-70b.json - llama2-7b-gguf.json + azure-openai/ + azure-openai-gpt3-5.json + + llama2-70b/ + model.json + .gguf - huggingface.co/ # Model registries (de-factor open source) - meta-llama/ - llama2-70b-chat-hf/ - llama2-7b-chat/ - thebloke/ - llama2-70b-chat-hf-gguf/ - llama2-7b-chat/ - llama7b_q2_K_L.gguf - llama7b_q3_K_L.gguf - model.louis.ai/ # Private model registries - meta-llama/ - llama2-70b-chat-hf-tensorrt-llm/ - llama2-70b-chat-hf-awq/ - model.json - thebloke/ - llava-1-5-gguf/ # Use case with multiple model - mmproj.bin - model-q5.ggml - - llama-70b-finetune.bin - llama-70b-finetune.json -``` \ No newline at end of file + llama2-7b-gguf/ + llama2-7b-gguf-Q2.json + llama2-7b-gguf-Q3_K_L.json + .bin + + llava-ggml/ + llava-ggml-Q5.json + .proj + ggml + + llama-70b-finetune + llama-70b-finetune-q5.json + .bin +``` + +- Test cases + 1. If user airdrop model, drag and drop to Jan (bin + json file), Jan can pick up and use + 2. If user have fine tuned model, same as step 1 + 3. If user have 1 model that needs multiple binaries \ No newline at end of file From f70a16523d32cb7f0119385fdd2f58428c819fa7 Mon Sep 17 00:00:00 2001 From: hiro Date: Thu, 16 Nov 2023 17:36:39 +0700 Subject: [PATCH 3/7] chore: Refactor spec outline --- docs/docs/docs/specs/models.md | 141 +++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 51 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index 8d95043c2..4b53b279f 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -1,15 +1,44 @@ ---- -title: "Models" ---- - -Models are AI models like Llama and Mistral +# Model Specs > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models -## Model Object +## User Stories + +*Users can download from model registries or reuse downloaded model binaries with an model* + +*Users can use some default assistants* +- User can use existing models (openai, llama2-7b-Q3) right away +- User can browse model in model catalog +- If user airdrop model, drag and drop to Jan (bin + json file), Jan can pick up and use + +*Users can create an model from scratch* +- User can choose model from remote model registry or even their fine-tuned model locally, even multiple model binaries +- User can import and use the model easily on Jan + +*Users can create an custom model from an existing model* + + +## Jan Model Object > Equivalent to: https://platform.openai.com/docs/api-reference/models/object -- LOCAL MODEL - 1 binary `model-zephyr-7B.json` - [Reference](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/) + +| Property | Type | Description | Validation | +| -------- | -------- | -------- | -------- | +| `origin` | string | Unique identifier for the source of the model object. | Required | +| `import_format` | enum: `default`, `thebloke`, `janhq`, `openai` | Specifies the format for importing the object. | Defaults to `default` | +| `download_url` | string | URL for downloading the model. | Optional; defaults to model with recommended hardware | +| `id` | string | Identifier of the model file. Used mainly for API responses. | Optional; auto-generated if not specified | +| `object` | enum: `model`, `assistant`, `thread`, `message` | Type of the Jan Object. | Defaults to `model` | +| `created` | integer | Unix timestamp of the model's creation time. | Optional | +| `owned_by` | string | Identifier of the owner of the model. | Optional | +| `parameters` | object | Defines initialization and runtime parameters for the assistant. | Optional; specific sub-properties for `init` and `runtime` | +| -- `init` | object | Defines initialization parameters for the model. | Required | +| --`runtime` | object | Defines runtime parameters for the model. | Optional; Can be overridden by `Asissitant` | + +| `metadata` | map | Stores additional structured information about the model. | Optional; defaults to `{}` | + +### LOCAL MODEL - 1 binary `model-zephyr-7B.json` +> [Reference](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/) ```json # Required @@ -32,7 +61,6 @@ Models are AI models like Llama and Mistral "created": 1686935002, # Unix timestamp "owned_by": "TheBloke" -# Optional: params parameters: { "init": { "ctx_len": 2048, @@ -57,11 +85,11 @@ parameters: { } ``` -- LOCAL MODEL - multiple binaries `model-llava-v1.5-ggml.json` [Reference](https://huggingface.co/mys/ggml_llava-v1.5-13b) +### LOCAL MODEL - multiple binaries `model-llava-v1.5-ggml.json` +> [Reference](https://huggingface.co/mys/ggml_llava-v1.5-13b) ```json # Required - "origin": "mys/ggml_llava-v1.5-13b" # Optional - by default use `default`` @@ -76,7 +104,6 @@ parameters: { "created": 1686935002, "owned_by": "TheBloke" -# Optional: params parameters: { "init": { "ctx_len": 2048, @@ -101,7 +128,8 @@ parameters: { } ``` -- REMOTE MODEL `model-azure-openai-gpt4-turbo.json` - [Reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/)quickstart?tabs=command-line%2Cpython&pivots=rest-api +### REMOTE MODEL `model-azure-openai-gpt4-turbo.json` +> [Reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api) ```json # Required @@ -109,7 +137,7 @@ parameters: { # This is `api.openai.com` if it's OpenAI platform # Optional - by default use `default`` -import_format: azure_openai +"import_format": "azure_openai" # default # downloads the whole thing # thebloke # custom importer (detects from URL) # janhq # Custom importers @@ -122,9 +150,6 @@ import_format: azure_openai "created": 1686935002, "owned_by": "OpenAI Azure" -# Optional: params -# This is the one model gets configured and cannot be changed by assistant - parameters: { "init": { "API-KEY": "", @@ -146,37 +171,7 @@ parameters: { } ``` -## Model API -See [/model](/api/model) - -- Equivalent to: https://platform.openai.com/docs/api-reference/models - -```sh -# List models -GET https://localhost:1337/v1/models?filter=[enum](all,running,downloaded,downloading) -List[model_object] - -# Get model object -GET https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B -model_object - -# Delete model -DELETE https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B - -# Stop model -PUT https://localhost:1337/v1/models/{model_id}/stop # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B - -# Start model -PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B -{ - "id": [string] # The model name to be used in `chat_completion` = model_id - "model_parameters": [jsonPayload], - "engine": [enum](llamacpp,openai) -} -``` - -## Model Filesystem - +## Filesystem How `models` map onto your local filesystem ```shell= @@ -204,7 +199,51 @@ How `models` map onto your local filesystem .bin ``` -- Test cases - 1. If user airdrop model, drag and drop to Jan (bin + json file), Jan can pick up and use - 2. If user have fine tuned model, same as step 1 - 3. If user have 1 model that needs multiple binaries \ No newline at end of file +## Jan API +### Jan Model API +> Equivalent to: https://platform.openai.com/docs/api-reference/models + +```sh +# List models +GET https://localhost:1337/v1/models?state=[enum](all,running,downloaded,downloading) +[ + { + "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above + "object": "model", + "created": 1686935002, + "owned_by": "OpenAI Azure", + "state": enum[all,running,downloaded,downloading] + }, + { + "id": "model-llava-v1.5-ggml", # Autofilled by Jan with required URL above + "object": "model", + "created": 1686935002, + "owned_by": "mys", + "state": enum[all,running,downloaded,downloading] + } +] + +# Get model object +GET https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B +{ + "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above + "object": "model", + "created": 1686935002, + "owned_by": "OpenAI Azure", + "state": enum[all,running,downloaded,downloading] +}, + +# Delete model +DELETE https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B + +# Stop model +PUT https://localhost:1337/v1/models/{model_id}/stop # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B + +# Start model +PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B +{ + "id": [string] # The model name to be used in `chat_completion` = model_id + "model_parameters": [jsonPayload], + "engine": [enum](llamacpp,openai) +} +``` \ No newline at end of file From 75365b22fac30e0a116330fa340b94dba1fbd46b Mon Sep 17 00:00:00 2001 From: 0xSage Date: Fri, 17 Nov 2023 13:29:58 +0800 Subject: [PATCH 4/7] docs: improve model object --- docs/docs/docs/specs/models.md | 257 ++++++++++++++------------------- 1 file changed, 110 insertions(+), 147 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index 4b53b279f..a1d2c4ae4 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -4,174 +4,135 @@ ## User Stories -*Users can download from model registries or reuse downloaded model binaries with an model* +_Users can download a model via a web URL_ -*Users can use some default assistants* -- User can use existing models (openai, llama2-7b-Q3) right away -- User can browse model in model catalog -- If user airdrop model, drag and drop to Jan (bin + json file), Jan can pick up and use +- Wireframes here -*Users can create an model from scratch* -- User can choose model from remote model registry or even their fine-tuned model locally, even multiple model binaries -- User can import and use the model easily on Jan +_Users can import a model from local directory_ -*Users can create an custom model from an existing model* +- Wireframes here +_Users can configure model settings, like run parameters_ + +- Wireframes here ## Jan Model Object -> Equivalent to: https://platform.openai.com/docs/api-reference/models/object +- A `Jan Model Object` is a “representation of an model +- Objects are defined by `model-uuid.json` files in json format +- Objects are identified by `folder-name/model-uuid`, where its `id` is indicative of its file location. +- Objects are designed to be compatible with `OpenAI Model Objects`, with additional properties needed to run on our infrastructure. +- ALL object properties are optional, i.e. users should be able to run a model declared by an empty `json` file. -| Property | Type | Description | Validation | -| -------- | -------- | -------- | -------- | -| `origin` | string | Unique identifier for the source of the model object. | Required | -| `import_format` | enum: `default`, `thebloke`, `janhq`, `openai` | Specifies the format for importing the object. | Defaults to `default` | -| `download_url` | string | URL for downloading the model. | Optional; defaults to model with recommended hardware | -| `id` | string | Identifier of the model file. Used mainly for API responses. | Optional; auto-generated if not specified | -| `object` | enum: `model`, `assistant`, `thread`, `message` | Type of the Jan Object. | Defaults to `model` | -| `created` | integer | Unix timestamp of the model's creation time. | Optional | -| `owned_by` | string | Identifier of the owner of the model. | Optional | -| `parameters` | object | Defines initialization and runtime parameters for the assistant. | Optional; specific sub-properties for `init` and `runtime` | -| -- `init` | object | Defines initialization parameters for the model. | Required | -| --`runtime` | object | Defines runtime parameters for the model. | Optional; Can be overridden by `Asissitant` | +| Property | Type | Description | Validation | +| ----------------- | ----------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------ | +| `source_url` | string | The model download source. It can be an external url or a local filepath. | Defaults to `pwd`. See [Source_url](#Source_url) | +| `object` | enum: `model`, `assistant`, `thread`, `message` | Type of the Jan Object. Always `model` | Defaults to "model" | +| `name` | string | A vanity name | Defaults to filename | +| `description` | string | A vanity description of the model | Defaults to "" | +| `parameters` | map | Defines default model run parameters used by any assistant. | Defaults to `{}` | +| `metadata` | map | Stores additional structured information about the model. | Defaults to `{}` | +| `metadata.engine` | enum: `llamacpp`, `api`, `tensorrt` | The model backend used to run model. | Defaults to "llamacpp" | -| `metadata` | map | Stores additional structured information about the model. | Optional; defaults to `{}` | +### Source_url -### LOCAL MODEL - 1 binary `model-zephyr-7B.json` -> [Reference](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/) +- Users can download models from a `remote` source or reference an existing `local` model. +- If this property is not specified in the Model Object file, then the default behavior is to look in the current directory. + +#### Local source_url + +- Users can import a local model by providing the filepath to the model ```json -# Required -"origin": "TheBloke/zephyr-7B-beta-GGUF" - -# Optional - by default use `default`` -"import_format": "thebloke" -# default # downloads the whole thing -# thebloke # custom importer (detects from URL) -# janhq # Custom importers -# openai +// ./models/llama2/llama2-7bn-gguf.json +"source_url": "~/Downloads/llama-2-7bn-q5-k-l.gguf", -# optional, by default download model with recommended hardware -"download_url": "zephyr-7b-beta.Q2_K.gguf" - -# https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q2_K.gguf?download=true +// Default, if property is omitted +"source_url": "./", +``` -# Optional: OpenAI format -"id": {model_file_name}, # No need to specify, only need to return in API -"object": "model", -"created": 1686935002, # Unix timestamp -"owned_by": "TheBloke" +#### Remote source_url -parameters: { - "init": { - "ctx_len": 2048, - "ngl": 100, - "embedding": true, - "n_parallel": 4, - "pre_prompt": "A chat between a curious user and an artificial intelligence", - "user_prompt": "USER: ", - "ai_prompt": "ASSISTANT: " - }, - "runtime": { - "temperature": "0.7", - "token_limit": "2048", - "top_k": "", - "top_p": "..", - } -} +- Users can download a model by remote URL. +- Supported url formats: + - `https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q3_K_L.gguf` + - `https://any-source.com/.../model-binary.bin` -// Jan specific configs -"metadata": { // @Q: should we put all under "jan" - "engine": "llamacpp", // enum[llamacpp,api] +#### Custom importers + +Additionally, Jan supports importing popular formats. For example, if you provide a HuggingFace URL for a `TheBloke` model, Jan automatically downloads and catalogs all quantizations. Custom importers autofills properties like `metadata.quantization` and `metadata.size`. + +Supported URL formats with custom importers: + +- `huggingface/thebloke`: `TODO: URL here` +- `janhq`: `TODO: put URL here` +- `azure_openai`: `TODO: put URL here` +- `openai`: `TODO: put URL here` + +### Generic Example + +- Model has 1 binary `model-zephyr-7B.json` +- See [source](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/) + +```json +// ./models/zephr/zephyr-7b-beta-Q4_K_M.json +// Note: Default fields are omitted for brevity +"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", +"parameters": { + "ctx_len": 2048, + "ngl": 100, + "embedding": true, + "n_parallel": 4, + "pre_prompt": "A chat between a curious user and an artificial intelligence", + "user_prompt": "USER: ", + "ai_prompt": "ASSISTANT: " + "temperature": "0.7", + "token_limit": "2048", + "top_k": "..", + "top_p": "..", +}, +"metadata": { + "quantization": "..", + "size": "..", } ``` -### LOCAL MODEL - multiple binaries `model-llava-v1.5-ggml.json` -> [Reference](https://huggingface.co/mys/ggml_llava-v1.5-13b) +### Example: multiple binaries `model-llava-v1.5-ggml.json` + +- Model has multiple binaries +- See [source](https://huggingface.co/mys/ggml_llava-v1.5-13b) ```json -# Required -"origin": "mys/ggml_llava-v1.5-13b" - -# Optional - by default use `default`` -"import_format": "default" -# default # downloads the whole thing -# thebloke # custom importer (detects from URL) -# janhq # Custom importers -# openai - -# Optional: OpenAI format -"id": {model_file_name}, # No need to specify, only need to return in API"object": "model", -"created": 1686935002, -"owned_by": "TheBloke" - -parameters: { - "init": { - "ctx_len": 2048, - "ngl": 100, - "embedding": true, - "n_parallel": 4, - "pre_prompt": "A chat between a curious user and an artificial intelligence", - "user_prompt": "USER: ", - "ai_prompt": "ASSISTANT: " - }, - "runtime": { - "temperature": "0.7", - "token_limit": "2048", - "top_k": "", - "top_p": "..", - } -} - -// Jan specific configs -"metadata": { // @Q: should we put all under "jan" - "engine": "llamacpp", // enum[llamacpp,api] +"source_url": "https://huggingface.co/mys/ggml_llava-v1.5-13b" +"metadata": { + "binaries": "..", } ``` -### REMOTE MODEL `model-azure-openai-gpt4-turbo.json` -> [Reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api) +### Example: Azure API + +- Using a remote API to access model +- See [source](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api) ```json -# Required -"origin": "https://docs-test-001.openai.azure.com/" -# This is `api.openai.com` if it's OpenAI platform - -# Optional - by default use `default`` -"import_format": "azure_openai" -# default # downloads the whole thing -# thebloke # custom importer (detects from URL) -# janhq # Custom importers -# azure_openai # Custom importers -# openai # Custom importers - -# Optional: OpenAI format -"id": "/openai.azure.com/docs-test-001/gpt4-turbo", # Autofilled by Jan with required URL above -"object": "model", -"created": 1686935002, -"owned_by": "OpenAI Azure" - -parameters: { - "init": { - "API-KEY": "", - "DEPLOYMENT-NAME": "", - "api-version": "2023-05-15" - }, - "runtime": { - "temperature": "0.7", - "max_tokens": "2048", - "presence_penalty": "0", - "top_p": "1", - "stream": "true" - } +"source_url": "https://docs-test-001.openai.azure.com/openai.azure.com/docs-test-001/gpt4-turbo", +"parameters": { + "API-KEY": "", + "DEPLOYMENT-NAME": "", + "api-version": "2023-05-15", + "temperature": "0.7", + "max_tokens": "2048", + "presence_penalty": "0", + "top_p": "1", + "stream": "true" } - -// Jan specific configs -"metadata": { // @Q: should we put all under "jan" - "engine": "api", // enum[llamacpp,api] +"metadata": { + "engine": "api", } ``` ## Filesystem + How `models` map onto your local filesystem ```shell= @@ -183,24 +144,26 @@ How `models` map onto your local filesystem llama2-70b/ model.json .gguf - + llama2-7b-gguf/ llama2-7b-gguf-Q2.json llama2-7b-gguf-Q3_K_L.json .bin - + llava-ggml/ llava-ggml-Q5.json .proj ggml - + llama-70b-finetune llama-70b-finetune-q5.json .bin ``` ## Jan API + ### Jan Model API + > Equivalent to: https://platform.openai.com/docs/api-reference/models ```sh @@ -208,16 +171,16 @@ How `models` map onto your local filesystem GET https://localhost:1337/v1/models?state=[enum](all,running,downloaded,downloading) [ { - "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above + "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above "object": "model", - "created": 1686935002, + "created": 1686935002, "owned_by": "OpenAI Azure", "state": enum[all,running,downloaded,downloading] }, { - "id": "model-llava-v1.5-ggml", # Autofilled by Jan with required URL above + "id": "model-llava-v1.5-ggml", # Autofilled by Jan with required URL above "object": "model", - "created": 1686935002, + "created": 1686935002, "owned_by": "mys", "state": enum[all,running,downloaded,downloading] } @@ -226,9 +189,9 @@ GET https://localhost:1337/v1/models?state=[enum](all,running,downloaded,downloa # Get model object GET https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B { - "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above + "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above "object": "model", - "created": 1686935002, + "created": 1686935002, "owned_by": "OpenAI Azure", "state": enum[all,running,downloaded,downloading] }, @@ -246,4 +209,4 @@ PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model "model_parameters": [jsonPayload], "engine": [enum](llamacpp,openai) } -``` \ No newline at end of file +``` From fb24f775d1e70947199d3b696ea37459938a1a3e Mon Sep 17 00:00:00 2001 From: 0xSage Date: Fri, 17 Nov 2023 13:36:08 +0800 Subject: [PATCH 5/7] docs: fixes --- docs/docs/docs/specs/models.md | 67 ++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index a1d2c4ae4..c38f75071 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -133,34 +133,63 @@ Supported URL formats with custom importers: ## Filesystem -How `models` map onto your local filesystem +- Everything needed to represent a `model` is packaged into an `Model folder`. +- The folder is standalone and can be easily zipped, imported, and exported, e.g. to Github. +- The folder always contains at least one `Model Object`, declared in a `json` format. + - The folder and file do not have to share the same name +- The model `id` is made up of `folder_name` + `filename` and is thus always unique. -```shell= +```sh /janroot /models - azure-openai/ - azure-openai-gpt3-5.json + azure-openai/ # Folder name + azure-openai-gpt3-5.json # File name llama2-70b/ model.json .gguf - - llama2-7b-gguf/ - llama2-7b-gguf-Q2.json - llama2-7b-gguf-Q3_K_L.json - .bin - - llava-ggml/ - llava-ggml-Q5.json - .proj - ggml - - llama-70b-finetune - llama-70b-finetune-q5.json - .bin ``` -## Jan API +### Default model package + +- Jan ships iwht a dfault model package containing recommended models +- ONly the Model Object `json` files are included +- Users must explicitly download the model binaries + +```sh +mistral-7b/ + mistral-7b.json +hermes-7b/ + hermes-7b.json +``` + +### Multiple quantizations + +```sh +llama2-7b-gguf/ + llama2-7b-gguf-Q2.json + llama2-7b-gguf-Q3_K_L.json + .bin +``` + +### Multiple model partitions + +```sh +llava-ggml/ + llava-ggml-Q5.json + .proj + ggml +``` + +### ?? whats this example for? + +```sh +llama-70b-finetune/ + llama-70b-finetune-q5.json + .bin +``` + +## Jan API - TODO ### Jan Model API From f303364d2f0e0790ee680896c6d774ce1d9c4357 Mon Sep 17 00:00:00 2001 From: 0xSage Date: Fri, 17 Nov 2023 13:37:46 +0800 Subject: [PATCH 6/7] docs: nits --- docs/docs/docs/specs/models.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index c38f75071..10ba84640 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -19,8 +19,8 @@ _Users can configure model settings, like run parameters_ ## Jan Model Object - A `Jan Model Object` is a “representation of an model -- Objects are defined by `model-uuid.json` files in json format -- Objects are identified by `folder-name/model-uuid`, where its `id` is indicative of its file location. +- Objects are defined by `model-name.json` files in json format +- Objects are identified by `folder-name/model-name`, where its `id` is indicative of its file location. - Objects are designed to be compatible with `OpenAI Model Objects`, with additional properties needed to run on our infrastructure. - ALL object properties are optional, i.e. users should be able to run a model declared by an empty `json` file. From 867c2d9b9927e197ff77f3f063925c8fda2124d0 Mon Sep 17 00:00:00 2001 From: 0xSage Date: Fri, 17 Nov 2023 14:01:41 +0800 Subject: [PATCH 7/7] docs: add api sections that need to be done --- docs/docs/docs/specs/models.md | 165 +++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 69 deletions(-) diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md index 10ba84640..482a7a037 100644 --- a/docs/docs/docs/specs/models.md +++ b/docs/docs/docs/specs/models.md @@ -16,23 +16,30 @@ _Users can configure model settings, like run parameters_ - Wireframes here +_Users can override run settings at runtime_ + +- See [assistant]() and [thread]() + ## Jan Model Object -- A `Jan Model Object` is a “representation of an model -- Objects are defined by `model-name.json` files in json format +- A `Jan Model Object` is a “representation" of a model +- Objects are defined by `model-name.json` files in `json` format - Objects are identified by `folder-name/model-name`, where its `id` is indicative of its file location. - Objects are designed to be compatible with `OpenAI Model Objects`, with additional properties needed to run on our infrastructure. - ALL object properties are optional, i.e. users should be able to run a model declared by an empty `json` file. -| Property | Type | Description | Validation | -| ----------------- | ----------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------ | -| `source_url` | string | The model download source. It can be an external url or a local filepath. | Defaults to `pwd`. See [Source_url](#Source_url) | -| `object` | enum: `model`, `assistant`, `thread`, `message` | Type of the Jan Object. Always `model` | Defaults to "model" | -| `name` | string | A vanity name | Defaults to filename | -| `description` | string | A vanity description of the model | Defaults to "" | -| `parameters` | map | Defines default model run parameters used by any assistant. | Defaults to `{}` | -| `metadata` | map | Stores additional structured information about the model. | Defaults to `{}` | -| `metadata.engine` | enum: `llamacpp`, `api`, `tensorrt` | The model backend used to run model. | Defaults to "llamacpp" | +| Property | Type | Description | Validation | +| ----------------------- | ------------------------------------------------------------- | ------------------------------------------------------------------------- | ------------------------------------------------ | +| `source_url` | string | The model download source. It can be an external url or a local filepath. | Defaults to `pwd`. See [Source_url](#Source_url) | +| `object` | enum: `model`, `assistant`, `thread`, `message` | Type of the Jan Object. Always `model` | Defaults to "model" | +| `name` | string | A vanity name | Defaults to filename | +| `description` | string | A vanity description of the model | Defaults to "" | +| `state` | enum[`running` , `stopped`, `not-downloaded` , `downloading`] | Needs more thought | Defaults to `not-downloaded` | +| `parameters` | map | Defines default model run parameters used by any assistant. | Defaults to `{}` | +| `metadata` | map | Stores additional structured information about the model. | Defaults to `{}` | +| `metadata.engine` | enum: `llamacpp`, `api`, `tensorrt` | The model backend used to run model. | Defaults to "llamacpp" | +| `metadata.quantization` | string | Supported formats only | See [Custom importers](#Custom-importers) | +| `metadata.binaries` | array | Supported formats only. | See [Custom importers](#Custom-importers) | ### Source_url @@ -76,7 +83,7 @@ Supported URL formats with custom importers: ```json // ./models/zephr/zephyr-7b-beta-Q4_K_M.json -// Note: Default fields are omitted for brevity +// Note: Default fields omitted for brevity "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", "parameters": { "ctx_len": 2048, @@ -97,7 +104,7 @@ Supported URL formats with custom importers: } ``` -### Example: multiple binaries `model-llava-v1.5-ggml.json` +### Example: multiple binaries - Model has multiple binaries - See [source](https://huggingface.co/mys/ggml_llava-v1.5-13b) @@ -105,7 +112,7 @@ Supported URL formats with custom importers: ```json "source_url": "https://huggingface.co/mys/ggml_llava-v1.5-13b" "metadata": { - "binaries": "..", + "binaries": "..", // TODO: what should this property be } ``` @@ -134,10 +141,10 @@ Supported URL formats with custom importers: ## Filesystem - Everything needed to represent a `model` is packaged into an `Model folder`. -- The folder is standalone and can be easily zipped, imported, and exported, e.g. to Github. -- The folder always contains at least one `Model Object`, declared in a `json` format. - - The folder and file do not have to share the same name -- The model `id` is made up of `folder_name` + `filename` and is thus always unique. +- The `folder` is standalone and can be easily zipped, imported, and exported, e.g. to Github. +- The `folder` always contains at least one `Model Object`, declared in a `json` format. + - The `folder` and `file` do not have to share the same name +- The model `id` is made up of `folder_name/filename` and is thus always unique. ```sh /janroot @@ -150,21 +157,24 @@ Supported URL formats with custom importers: .gguf ``` -### Default model package +### Default ./model folder -- Jan ships iwht a dfault model package containing recommended models -- ONly the Model Object `json` files are included -- Users must explicitly download the model binaries +- Jan ships with a default model folders containing recommended models +- Only the Model Object `json` files are included +- Users must later explicitly download the model binaries ```sh -mistral-7b/ - mistral-7b.json -hermes-7b/ - hermes-7b.json +models/ + mistral-7b/ + mistral-7b.json + hermes-7b/ + hermes-7b.json ``` ### Multiple quantizations +- Each quantization has its own `Jan Model Object` file + ```sh llama2-7b-gguf/ llama2-7b-gguf-Q2.json @@ -174,6 +184,8 @@ llama2-7b-gguf/ ### Multiple model partitions +- A Model that is partitioned into several binaries use just 1 file + ```sh llava-ggml/ llava-ggml-Q5.json @@ -183,59 +195,74 @@ llava-ggml/ ### ?? whats this example for? +- ?? + ```sh llama-70b-finetune/ llama-70b-finetune-q5.json .bin ``` -## Jan API - TODO +## Jan API -### Jan Model API +### Model API Object -> Equivalent to: https://platform.openai.com/docs/api-reference/models +- The `Jan Model Object` maps into the `OpenAI Model Object`. +- Properties marked with `*` are compatible with the [OpenAI `model` object](https://platform.openai.com/docs/api-reference/models) +- Note: The `Jan Model Object` has additional properties when retrieved via its API endpoint. +- https://platform.openai.com/docs/api-reference/models/object -```sh -# List models -GET https://localhost:1337/v1/models?state=[enum](all,running,downloaded,downloading) -[ - { - "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above - "object": "model", - "created": 1686935002, - "owned_by": "OpenAI Azure", - "state": enum[all,running,downloaded,downloading] - }, - { - "id": "model-llava-v1.5-ggml", # Autofilled by Jan with required URL above - "object": "model", - "created": 1686935002, - "owned_by": "mys", - "state": enum[all,running,downloaded,downloading] - } -] +| Property | Type | Public Description | Jan Model Object (`m`) Property | +| ------------- | -------------- | ----------------------------------------------------------- | -------------------------------------------- | +| `id`\* | string | Model uuid; also the file location under `/models` | `folder/filename` | +| `object`\* | string | Always "model" | `m.object` | +| `created`\* | integer | Timestamp when model was created. | `m.json` creation time | +| `owned_by`\* | string | The organization that owns the model. | grep author from `m.source_url` OR $(whoami) | +| `name` | string or null | A display name | `m.name` or filename | +| `description` | string | A vanity description of the model | `m.description` | +| `state` | enum | | | +| `parameters` | map | Defines default model run parameters used by any assistant. | | +| `metadata` | map | Stores additional structured information about the model. | | -# Get model object -GET https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B -{ - "id": "model-azure-openai-gpt4-turbo", # Autofilled by Jan with required URL above - "object": "model", - "created": 1686935002, - "owned_by": "OpenAI Azure", - "state": enum[all,running,downloaded,downloading] -}, +### List models -# Delete model -DELETE https://localhost:1337/v1/models/{model_id} # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B +- https://platform.openai.com/docs/api-reference/models/list -# Stop model -PUT https://localhost:1337/v1/models/{model_id}/stop # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B +TODO: @hiro -# Start model -PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model_id} model-azure-openai-gpt4-turbo, model-zephyr-7B -{ - "id": [string] # The model name to be used in `chat_completion` = model_id - "model_parameters": [jsonPayload], - "engine": [enum](llamacpp,openai) -} -``` +### Get Model + +- https://platform.openai.com/docs/api-reference/models/retrieve + +TODO: @hiro + +### Delete Model + +- https://platform.openai.com/docs/api-reference/models/delete + +TODO: @hiro + +### Get Model State + +> Jan-only endpoint +> TODO: @hiro + +### Get Model Metadata + +> Jan-only endpoint +> TODO: @hiro + +### Download Model + +> Jan-only endpoint +> TODO: @hiro + +### Start Model + +> Jan-only endpoint +> TODO: @hiro + +### Stop Model + +> Jan-only endpoint +> TODO: @hiro