From df883a7cb852ff0babc29523750a5c8907fd16f5 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sat, 18 Nov 2023 17:04:51 +0700
Subject: [PATCH] fix(spec): model spec update

---
 docs/docs/docs/specs/models.md | 241 +++++++++++++++++++++++----------
 1 file changed, 166 insertions(+), 75 deletions(-)

diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md
index d04d2b2c3..053c99d3d 100644
--- a/docs/docs/docs/specs/models.md
+++ b/docs/docs/docs/specs/models.md
@@ -46,7 +46,7 @@ _Users can override run settings at runtime_
 | `object`                | enum: `model`, `assistant`, `thread`, `message`               | Type of the Jan Object. Always `model`                                    | Defaults to "model"                              |
 | `name`                  | string                                                        | A vanity name                                                             | Defaults to filename                             |
 | `description`           | string                                                        | A vanity description of the model                                         | Defaults to ""                                   |
-| `state`                 | enum[`running` , `stopped`, `not-downloaded` , `downloading`] | Needs more thought                                                        | Defaults to `not-downloaded`                     |
+| `state`                 | enum[`to_download` , `downloading`, `ready` , `running`] | Needs more thought                                                        | Defaults to `to_download`                     |
 | `parameters`            | map                                                           | Defines default model run parameters used by any assistant.               | Defaults to `{}`                                 |
 | `metadata`              | map                                                           | Stores additional structured information about the model.                 | Defaults to `{}`                                 |
 | `metadata.engine`       | enum: `llamacpp`, `api`, `tensorrt`                           | The model backend used to run model.                                      | Defaults to "llamacpp"                           |
@@ -83,10 +83,10 @@ Additionally, Jan supports importing popular formats. For example, if you provid
 
 Supported URL formats with custom importers:
 
-- `huggingface/thebloke`: `TODO: URL here`
+- `huggingface/thebloke`: [Link](https://huggingface.co/TheBloke/Llama-2-7B-GGUF)
 - `janhq`: `TODO: put URL here`
-- `azure_openai`: `TODO: put URL here`
-- `openai`: `TODO: put URL here`
+- `azure_openai`: `https://docs-test-001.openai.azure.com/openai.azure.com/docs-test-001/gpt4-turbo`
+- `openai`: `api.openai.com`
 
 ### Generic Example
 
@@ -98,52 +98,66 @@ Supported URL formats with custom importers:
 // Note: Default fields omitted for brevity
 "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
 "parameters": {
-    "ctx_len": 2048,
-    "ngl": 100,
-    "embedding": true,
-    "n_parallel": 4,
+  "init": {
+    "ctx_len": "2048",
+    "ngl": "100",
+    "embedding": "true",
+    "n_parallel": "4",
     "pre_prompt": "A chat between a curious user and an artificial intelligence",
     "user_prompt": "USER: ",
     "ai_prompt": "ASSISTANT: "
+  },
+  "runtime": {
     "temperature": "0.7",
     "token_limit": "2048",
-    "top_k": "..",
-    "top_p": "..",
+    "top_k": "0",
+    "top_p": "1",
+    "stream": "true"
+  }
 },
 "metadata": {
-    "quantization": "..",
-    "size": "..",
+    "engine": "llamacpp",
+    "quantization": "Q3_K_L",
+    "size": "7B",
 }
 ```
 
 ### Example: multiple binaries
 
-- Model has multiple binaries
+- Model has multiple binaries `model-llava-1.5-ggml.json`
 - See [source](https://huggingface.co/mys/ggml_llava-v1.5-13b)
 
 ```json
-"source_url": "https://huggingface.co/mys/ggml_llava-v1.5-13b"
+"source_url": "https://huggingface.co/mys/ggml_llava-v1.5-13b",
+"parameters": {"init": {}, "runtime": {}}
 "metadata": {
-    "binaries": "..", // TODO: what should this property be
+    "mmproj_binary": "https://huggingface.co/mys/ggml_llava-v1.5-13b/blob/main/mmproj-model-f16.gguf",
+    "ggml_binary": "https://huggingface.co/mys/ggml_llava-v1.5-13b/blob/main/ggml-model-q5_k.gguf",
+    "engine": "llamacpp",
+    "quantization": "Q5_K"
 }
 ```
 
 ### Example: Azure API
 
-- Using a remote API to access model
+- Using a remote API to access model `model-azure-openai-gpt4-turbo.json`
 - See [source](https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api)
 
 ```json
 "source_url": "https://docs-test-001.openai.azure.com/openai.azure.com/docs-test-001/gpt4-turbo",
 "parameters": {
+  "init" {
     "API-KEY": "",
     "DEPLOYMENT-NAME": "",
-    "api-version": "2023-05-15",
+    "api-version": "2023-05-15"
+  },
+  "runtime": {
     "temperature": "0.7",
     "max_tokens": "2048",
     "presence_penalty": "0",
     "top_p": "1",
     "stream": "true"
+  }
 }
 "metadata": {
     "engine": "api",
@@ -155,7 +169,7 @@ Supported URL formats with custom importers:
 - Everything needed to represent a `model` is packaged into an `Model folder`.
 - The `folder` is standalone and can be easily zipped, imported, and exported, e.g. to Github.
 - The `folder` always contains at least one `Model Object`, declared in a `json` format.
-  - The `folder` and `file` do not have to share the same name
+- The `folder` and `file` do not have to share the same name
 - The model `id` is made up of `folder_name/filename` and is thus always unique.
 
 ```sh
@@ -170,11 +184,9 @@ Supported URL formats with custom importers:
 ```
 
 ### Default ./model folder
-
 - Jan ships with a default model folders containing recommended models
 - Only the Model Object `json` files are included
 - Users must later explicitly download the model binaries
-
 ```sh
 models/
     mistral-7b/
@@ -182,7 +194,6 @@ models/
     hermes-7b/
         hermes-7b.json
 ```
-
 ### Multiple quantizations
 
 - Each quantization has its own `Jan Model Object` file
@@ -193,7 +204,6 @@ llama2-7b-gguf/
     llama2-7b-gguf-Q3_K_L.json
     .bin
 ```
-
 ### Multiple model partitions
 
 - A Model that is partitioned into several binaries use just 1 file
@@ -204,8 +214,7 @@ llava-ggml/
     .proj
     ggml
 ```
-
-### ?? whats this example for?
+### Your locally fine-tuned model
 
 - ??
 
@@ -214,67 +223,149 @@ llama-70b-finetune/
     llama-70b-finetune-q5.json
     .bin
 ```
-
 ## Jan API
-
 ### Model API Object
-
 - The `Jan Model Object` maps into the `OpenAI Model Object`.
 - Properties marked with `*` are compatible with the [OpenAI `model` object](https://platform.openai.com/docs/api-reference/models)
 - Note: The `Jan Model Object` has additional properties when retrieved via its API endpoint.
-- https://platform.openai.com/docs/api-reference/models/object
+> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/object
 
-| Property      | Type           | Public Description                                          | Jan Model Object (`m`) Property              |
-| ------------- | -------------- | ----------------------------------------------------------- | -------------------------------------------- |
-| `id`\*        | string         | Model uuid; also the file location under `/models`          | `folder/filename`                            |
-| `object`\*    | string         | Always "model"                                              | `m.object`                                   |
-| `created`\*   | integer        | Timestamp when model was created.                           | `m.json` creation time                       |
-| `owned_by`\*  | string         | The organization that owns the model.                       | grep author from `m.source_url` OR $(whoami) |
-| `name`        | string or null | A display name                                              | `m.name` or filename                         |
-| `description` | string         | A vanity description of the model                           | `m.description`                              |
-| `state`       | enum           |                                                             |                                              |
-| `parameters`  | map            | Defines default model run parameters used by any assistant. |                                              |
-| `metadata`    | map            | Stores additional structured information about the model.   |                                              |
-
-### List models
-
-- https://platform.openai.com/docs/api-reference/models/list
-
-TODO: @hiro
+### Model lifecycle
+Model has 4 states (enum)
+- `to_download`
+- `downloading`
+- `ready`
+- `running`
 
 ### Get Model
-
-- https://platform.openai.com/docs/api-reference/models/retrieve
-
-TODO: @hiro
-
+> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/retrieve
+- Example request
+```shell
+curl {JAN_URL}/v1/models/{model_id}
+```
+- Example response
+```json
+{
+  "id": "model-zephyr-7B",
+  "object": "model",
+  "created_at": 1686935002,
+  "owned_by": "thebloke",
+  "state": "running",
+  "source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
+  "parameters": {
+     "ctx_len": 2048,
+     "ngl": 100,
+     "embedding": true,
+     "n_parallel": 4,
+     "pre_prompt": "A chat between a curious user and an artificial intelligence",
+     "user_prompt": "USER: ",
+     "ai_prompt": "ASSISTANT: ",
+     "temperature": "0.7",
+     "token_limit": "2048",
+     "top_k": "0",
+     "top_p": "1",
+  },
+  "metadata": {
+     "engine": "llamacpp",
+     "quantization": "Q3_K_L",
+     "size": "7B",
+  }
+}
+```
+### List models
+Lists the currently available models, and provides basic information about each one such as the owner and availability.
+> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/list
+- Example request
+```shell=
+curl {JAN_URL}/v1/models
+```
+- Example response
+```json
+{
+  "object": "list",
+  "data": [
+    {
+      "id": "model-zephyr-7B",
+      "object": "model",
+      "created_at": 1686935002,
+      "owned_by": "thebloke",
+      "state": "running"
+    },
+    {
+      "id": "ft-llama-70b-gguf",
+      "object": "model",
+      "created_at": 1686935002,
+      "owned_by": "you",
+      "state": "stopped"
+    },
+    {
+      "id": "model-azure-openai-gpt4-turbo",
+      "object": "model",
+      "created_at": 1686935002,
+      "owned_by": "azure_openai",
+      "state": "running"
+    },
+  ],
+  "object": "list"
+}
+```
 ### Delete Model
-
-- https://platform.openai.com/docs/api-reference/models/delete
-
-TODO: @hiro
-
-### Get Model State
-
-> Jan-only endpoint
-> TODO: @hiro
-
-### Get Model Metadata
-
-> Jan-only endpoint
-> TODO: @hiro
-
-### Download Model
-
-> Jan-only endpoint
-> TODO: @hiro
-
+> OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models/delete
+`- Example request
+```shell
+curl -X DELETE {JAN_URL}/v1/models/{model_id}
+```
+- Example response
+```json
+{
+  "id": "model-zephyr-7B",
+  "object": "model",
+  "deleted": true,
+  "state": "to_download"
+}
+```
 ### Start Model
-
 > Jan-only endpoint
-> TODO: @hiro
-
+The request to start `model` by changing model state from `ready` to `running`
+- Example request
+```shell
+curl -X PUT {JAN_URL}/v1/models{model_id}/start
+```
+- Example response
+```json
+{
+  "id": "model-zephyr-7B",
+  "object": "model",
+  "state": "running"
+}
+```
 ### Stop Model
-
 > Jan-only endpoint
-> TODO: @hiro
+The request to start `model` by changing model state from `running` to `ready`
+- Example request
+```shell
+curl -X PUT {JAN_URL}/v1/models/{model_id}/stop
+```
+- Example response
+```json
+{
+  "id": "model-zephyr-7B",
+  "object": "model",
+  "state": "ready"
+}
+```
+### Download Model
+> Jan-only endpoint
+The request to download `model` by changing model state from `to_download` to `downloading` then `ready`once it's done.
+- Example request
+```shell
+curl -X POST {JAN_URL}/v1/models/
+```
+- Example response
+```json
+{
+  "id": "model-zephyr-7B",
+  "object": "model",
+  "state": "downloading"
+}
+```
\ No newline at end of file