From 6ca8ce24db50f077e54d3b5760ddfbfd930d0834 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Thu, 16 Nov 2023 17:10:11 +0700
Subject: [PATCH] fix: Update content based on engs sync

---
 docs/docs/docs/specs/models.md | 186 ++++++++++++++++++++-------------
 1 file changed, 114 insertions(+), 72 deletions(-)

diff --git a/docs/docs/docs/specs/models.md b/docs/docs/docs/specs/models.md
index b2de17bf4..8d95043c2 100644
--- a/docs/docs/docs/specs/models.md
+++ b/docs/docs/docs/specs/models.md
@@ -7,64 +7,106 @@ Models are AI models like Llama and Mistral
 > OpenAI Equivalent: https://platform.openai.com/docs/api-reference/models
 
 ## Model Object
-
-- LOCAL MODEL `model-zephyr-7B.json` 
-  - Reference: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/
-
 > Equivalent to: https://platform.openai.com/docs/api-reference/models/object
 
-```sh=
-# Required
+- LOCAL MODEL - 1 binary `model-zephyr-7B.json` - [Reference](https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/)
 
-"url": TheBloke/zephyr-7B-beta-GGUF
+```json
+# Required
+"origin": "TheBloke/zephyr-7B-beta-GGUF"
     
 # Optional - by default use `default``
-import_format: thebloke
+"import_format": "thebloke"
 #    default         # downloads the whole thing
 #    thebloke        # custom importer (detects from URL)
 #    janhq           # Custom importers
 #    openai
-"default_download": zephyr-7b-beta.Q2_K.gguf # optional, by default download model with recommended hardware
+
+# optional, by default download model with recommended hardware
+"download_url": "zephyr-7b-beta.Q2_K.gguf" - 
+# https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q2_K.gguf?download=true
 
 # Optional: OpenAI format
-"id": "/huggingface.co/TheBloke/zephyr-7B-beta-GGUF", # Autofilled by Jan with required URL above 
+"id": {model_file_name}, # No need to specify, only need to return in API
 "object": "model",
-"created": 1686935002, 
+"created": 1686935002, # Unix timestamp
 "owned_by": "TheBloke"
 
 # Optional: params
-"init_parameters": {
-    "ctx_len": 2048,
-    "ngl": 100,
-    "embedding": true,
-    "n_parallel": 4,
-    "pre_prompt": "A chat between a curious user and an artificial intelligence",
-    "user_prompt": "USER: ",
-    "ai_prompt": "ASSISTANT: "
-},
-
-"runtime_parameters": {
-    "temperature": "0.7",
-    "token_limit": "2048",
-    "top_k": "",
-    "top_p": "..",
+parameters: {
+    "init": {
+        "ctx_len": 2048,
+        "ngl": 100,
+        "embedding": true,
+        "n_parallel": 4,
+        "pre_prompt": "A chat between a curious user and an artificial intelligence",
+        "user_prompt": "USER: ",
+        "ai_prompt": "ASSISTANT: "
+    },
+    "runtime": {
+        "temperature": "0.7",
+        "token_limit": "2048",
+        "top_k": "",
+        "top_p": "..",
+    }
 }
 
 // Jan specific configs
 "metadata": {               // @Q: should we put all under "jan"
-    "engine": "api",               // enum[llamacpp,api]
+    "engine": "llamacpp",               // enum[llamacpp,api]
 }
 ```
 
-- REMOTE MODEL `model-azure-openai-gpt4-turbo.json` 
-  - Reference: https://learn.microsoft.com/en-us/azure/ai-services/openai/quickstart?tabs=command-line%2Cpython&pivots=rest-api
+- LOCAL MODEL - multiple binaries `model-llava-v1.5-ggml.json` [Reference](https://huggingface.co/mys/ggml_llava-v1.5-13b)
 
-> Equivalent to: https://platform.openai.com/docs/api-reference/models/object
-
-```sh=
+```json
 # Required
 
-"url": https://docs-test-001.openai.azure.com/ # This is `api.openai.com` if it's OpenAI platform
+"origin": "mys/ggml_llava-v1.5-13b"
+    
+# Optional - by default use `default``
+"import_format": "default"
+#    default         # downloads the whole thing
+#    thebloke        # custom importer (detects from URL)
+#    janhq           # Custom importers
+#    openai
+
+# Optional: OpenAI format
+"id": {model_file_name}, # No need to specify, only need to return in API"object": "model",
+"created": 1686935002, 
+"owned_by": "TheBloke"
+
+# Optional: params
+parameters: {
+    "init": {
+        "ctx_len": 2048,
+        "ngl": 100,
+        "embedding": true,
+        "n_parallel": 4,
+        "pre_prompt": "A chat between a curious user and an artificial intelligence",
+        "user_prompt": "USER: ",
+        "ai_prompt": "ASSISTANT: "
+    },
+    "runtime": {
+        "temperature": "0.7",
+        "token_limit": "2048",
+        "top_k": "",
+        "top_p": "..",
+    }
+}
+
+// Jan specific configs
+"metadata": {               // @Q: should we put all under "jan"
+    "engine": "llamacpp",               // enum[llamacpp,api]
+}
+```
+
+- REMOTE MODEL `model-azure-openai-gpt4-turbo.json` - [Reference](https://learn.microsoft.com/en-us/azure/ai-services/openai/)quickstart?tabs=command-line%2Cpython&pivots=rest-api
+
+```json
+# Required
+"origin": "https://docs-test-001.openai.azure.com/" 
+# This is `api.openai.com` if it's OpenAI platform
     
 # Optional - by default use `default``
 import_format: azure_openai
@@ -73,7 +115,6 @@ import_format: azure_openai
 #    janhq           # Custom importers
 #    azure_openai    # Custom importers
 #    openai          # Custom importers
-"default_download": zephyr-7b-beta.Q2_K.gguf # optional, by default download model with recommended hardware
 
 # Optional: OpenAI format
 "id": "/openai.azure.com/docs-test-001/gpt4-turbo", # Autofilled by Jan with required URL above 
@@ -83,19 +124,20 @@ import_format: azure_openai
 
 # Optional: params
 # This is the one model gets configured and cannot be changed by assistant
-"init_parameters": {
-    "API-KEY": "",
-    "DEPLOYMENT-NAME": "",
-    "api-version": "2023-05-15"
-},
 
-# This is the one that assistant can override
-"runtime_parameters": {
-    "temperature": "0.7",
-    "max_tokens": "2048",
-    "presence_penalty": "0",
-    "top_p": "1",
-    "stream": "true"
+parameters: {
+    "init": {
+        "API-KEY": "",
+        "DEPLOYMENT-NAME": "",
+        "api-version": "2023-05-15"
+    },
+    "runtime": {
+        "temperature": "0.7",
+        "max_tokens": "2048",
+        "presence_penalty": "0",
+        "top_p": "1",
+        "stream": "true"
+    }
 }
 
 // Jan specific configs
@@ -105,7 +147,6 @@ import_format: azure_openai
 ```
 
 ## Model API
-
 See [/model](/api/model)
 
 - Equivalent to: https://platform.openai.com/docs/api-reference/models
@@ -130,7 +171,7 @@ PUT https://localhost:1337/v1/models/{model_id}/start # json file name as {model
 {
   "id": [string] # The model name to be used in `chat_completion` = model_id
   "model_parameters": [jsonPayload],
-  "engine": [enum](llamacpp)
+  "engine": [enum](llamacpp,openai)
 }
 ```
 
@@ -141,28 +182,29 @@ How `models` map onto your local filesystem
 ```shell=
 /janroot
     /models
-        llama2-70b.json
-        llama2-7b-gguf.json
+        azure-openai/
+            azure-openai-gpt3-5.json
+
+        llama2-70b/
+            model.json
+            .gguf
         
-        huggingface.co/ # Model registries (de-factor open source)
-            meta-llama/
-                llama2-70b-chat-hf/
-                llama2-7b-chat/
-            thebloke/
-                llama2-70b-chat-hf-gguf/
-                llama2-7b-chat/
-                    llama7b_q2_K_L.gguf
-                    llama7b_q3_K_L.gguf
-        model.louis.ai/ # Private model registries
-            meta-llama/
-                llama2-70b-chat-hf-tensorrt-llm/
-                llama2-70b-chat-hf-awq/
-                    model.json
-            thebloke/
-                llava-1-5-gguf/ # Use case with multiple model 
-                    mmproj.bin
-                    model-q5.ggml
-                    
-        llama-70b-finetune.bin
-        llama-70b-finetune.json
-```
\ No newline at end of file
+        llama2-7b-gguf/
+            llama2-7b-gguf-Q2.json
+            llama2-7b-gguf-Q3_K_L.json
+            .bin
+        
+        llava-ggml/
+            llava-ggml-Q5.json
+            .proj
+            ggml
+        
+        llama-70b-finetune
+            llama-70b-finetune-q5.json
+            .bin
+```
+
+- Test cases
+    1. If user airdrop model, drag and drop to Jan (bin + json file), Jan can pick up and use
+    2. If user have fine tuned model, same as step 1
+    3. If user have 1 model that needs multiple binaries 
\ No newline at end of file