diff --git a/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json deleted file mode 100644 index f6e3d08e9..000000000 --- a/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "aya-23-35B-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/aya-23-35B-GGUF/resolve/main/aya-23-35B-Q4_K_M.gguf" - } - ], - "id": "aya-23-35b", - "object": "model", - "name": "Aya 23 35B Q4", - "version": "1.1", - "description": "Aya 23 can talk upto 23 languages fluently.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", - "llama_model_path": "aya-23-35B-Q4_K_M.gguf", - "ngl": 41 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "frequency_penalty": 0, - "presence_penalty": 0, - "stop": ["<|END_OF_TURN_TOKEN|>"] - }, - "metadata": { - "author": "CohereForAI", - "tags": ["34B", "Finetuned"], - "size": 21556982144 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json deleted file mode 100644 index 463f7eec7..000000000 --- a/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "aya-23-8B-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/aya-23-8B-GGUF/resolve/main/aya-23-8B-Q4_K_M.gguf" - } - ], - "id": "aya-23-8b", - "object": "model", - "name": "Aya 23 8B Q4", - "version": "1.2", - "description": "Aya 23 can talk upto 23 languages fluently.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>{system_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", - "llama_model_path": "aya-23-8B-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "frequency_penalty": 0, - "presence_penalty": 0, - "stop": ["<|END_OF_TURN_TOKEN|>"] - }, - "metadata": { - "author": "CohereForAI", - "tags": ["7B", "Finetuned"], - "size": 5056982144 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json deleted file mode 100644 index ccc8f693f..000000000 --- a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "ggml-model-q5_k.gguf", - "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q5_k.gguf" - }, - { - "filename": "mmproj-model-f16.gguf", - "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf" - } - ], - "id": "bakllava-1", - "object": "model", - "name": "BakLlava 1", - "version": "1.0", - "description": "BakLlava 1 can bring vision understanding to Jan", - "format": "gguf", - "settings": { - "vision_model": true, - "text_model": false, - "ctx_len": 4096, - "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", - "llama_model_path": "ggml-model-q5_k.gguf", - "mmproj": "mmproj-model-f16.gguf", - "ngl": 33 - }, - "parameters": { - "max_tokens": 4096 - }, - "metadata": { - "author": "Mys", - "tags": ["Vision"], - "size": 5750000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json deleted file mode 100644 index 7bd5bf3a4..000000000 --- a/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "sources": [ - { - "filename": "codeninja-1.0-openchat-7b.Q4_K_M.gguf", - "url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf" - } - ], - "id": "codeninja-1.0-7b", - "object": "model", - "name": "CodeNinja 7B Q4", - "version": "1.2", - "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", - "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Beowolx", - "tags": ["7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json deleted file mode 100644 index 2cce063e6..000000000 --- a/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Codestral-22B-v0.1-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Codestral-22B-v0.1-GGUF/resolve/main/Codestral-22B-v0.1-Q4_K_M.gguf" - } - ], - "id": "codestral-22b", - "object": "model", - "name": "Codestral 22B Q4", - "version": "1.1", - "description": "Latest model from MistralAI optimized for code generation tasks.", - "format": "gguf", - "settings": { - "ctx_len": 32000, - "prompt_template": "{system_message} [INST] {prompt} [/INST]", - "llama_model_path": "Codestral-22B-v0.1-Q4_K_M.gguf", - "ngl": 57 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32000, - "stop": [", [/INST]"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MistralAI", - "tags": ["22B", "Finetuned", "Featured"], - "size": 13341237440 - }, - "engine": "llama-cpp" - } - diff --git a/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json deleted file mode 100644 index 13518604c..000000000 --- a/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "c4ai-command-r-v01-Q4_K_M.gguf", - "url": "https://huggingface.co/andrewcanis/c4ai-command-r-v01-GGUF/resolve/main/c4ai-command-r-v01-Q4_K_M.gguf" - } - ], - "id": "command-r-34b", - "object": "model", - "name": "Command-R v01 34B Q4", - "version": "1.6", - "description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>", - "llama_model_path": "c4ai-command-r-v01-Q4_K_M.gguf", - "ngl": 41 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "CohereAI", - "tags": ["34B", "Finetuned"], - "size": 21500000000 - }, - "engine": "llama-cpp" - } - diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json deleted file mode 100644 index 6722d253d..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "deepseek-coder-1.3b-instruct.Q8_0.gguf", - "url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf" - } - ], - "id": "deepseek-coder-1.3b", - "object": "model", - "name": "Deepseek Coder 1.3B Instruct Q8", - "version": "1.4", - "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", - "format": "gguf", - "settings": { - "ctx_len": 16384, - "prompt_template": "### Instruction:\n{prompt}\n### Response:", - "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf", - "ngl": 25 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 16384, - "stop": ["<|EOT|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Deepseek, The Bloke", - "tags": ["Tiny"], - "size": 1430000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json deleted file mode 100644 index 8a2e271cd..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "deepseek-coder-33b-instruct.Q4_K_M.gguf", - "url": "https://huggingface.co/mradermacher/deepseek-coder-33b-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q4_K_M.gguf" - } - ], - "id": "deepseek-coder-34b", - "object": "model", - "name": "Deepseek Coder 33B Instruct Q4", - "version": "1.4", - "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", - "format": "gguf", - "settings": { - "ctx_len": 16384, - "prompt_template": "### Instruction:\n{prompt}\n### Response:", - "llama_model_path": "deepseek-coder-33b-instruct.Q4_K_M.gguf", - "ngl": 63 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 16384, - "stop": ["<|EOT|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Deepseek", - "tags": ["33B"], - "size": 19940000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json deleted file mode 100644 index 7f98b07a1..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf" - } - ], - "id": "deepseek-r1-distill-llama-70b", - "object": "model", - "name": "DeepSeek R1 Distill Llama 70B Q4", - "version": "1.0", - "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|User|> {prompt} <|Assistant|>", - "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf", - "ngl": 81 - }, - "parameters": { - "temperature": 0.6, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "DeepSeek", - "tags": ["70B", "Featured"], - "size": 42500000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json deleted file mode 100644 index a3a075888..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf" - } - ], - "id": "deepseek-r1-distill-llama-8b", - "object": "model", - "name": "DeepSeek R1 Distill Llama 8B Q5", - "version": "1.0", - "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|User|> {prompt} <|Assistant|>", - "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.6, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "DeepSeek", - "tags": ["8B", "Featured"], - "size": 5730000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json deleted file mode 100644 index 74b3dfc54..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf" - } - ], - "id": "deepseek-r1-distill-qwen-1.5b", - "object": "model", - "name": "DeepSeek R1 Distill Qwen 1.5B Q5", - "version": "1.0", - "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|User|> {prompt} <|Assistant|>", - "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.6, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "DeepSeek", - "tags": ["1.5B", "Featured"], - "size": 1290000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json deleted file mode 100644 index 594ba6e41..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf" - } - ], - "id": "deepseek-r1-distill-qwen-14b", - "object": "model", - "name": "DeepSeek R1 Distill Qwen 14B Q4", - "version": "1.0", - "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|User|> {prompt} <|Assistant|>", - "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf", - "ngl": 49 - }, - "parameters": { - "temperature": 0.6, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "DeepSeek", - "tags": ["14B", "Featured"], - "size": 8990000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json deleted file mode 100644 index 6d7f5accf..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf" - } - ], - "id": "deepseek-r1-distill-qwen-32b", - "object": "model", - "name": "DeepSeek R1 Distill Qwen 32B Q4", - "version": "1.0", - "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|User|> {prompt} <|Assistant|>", - "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf", - "ngl": 65 - }, - "parameters": { - "temperature": 0.6, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "DeepSeek", - "tags": ["32B", "Featured"], - "size": 19900000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json deleted file mode 100644 index eae53cf0e..000000000 --- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf", - "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf" - } - ], - "id": "deepseek-r1-distill-qwen-7b", - "object": "model", - "name": "DeepSeek R1 Distill Qwen 7B Q5", - "version": "1.0", - "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|User|> {prompt} <|Assistant|>", - "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.6, - "top_p": 0.95, - "stream": true, - "max_tokens": 131072, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "DeepSeek", - "tags": ["7B", "Featured"], - "size": 5440000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json deleted file mode 100644 index 3278c9a81..000000000 --- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "gemma-1.1-2b-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf" - } - ], - "id": "gemma-1.1-2b-it", - "object": "model", - "name": "Gemma 1.1 2B Q4", - "version": "1.3", - "description": "Gemma is built from the same technology with Google's Gemini.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "user\n{prompt}\nmodel", - "llama_model_path": "gemma-1.1-2b-it-Q4_K_M.gguf", - "ngl": 19 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [""], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Google", - "tags": ["2B", "Finetuned", "Tiny"], - "size": 1630000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json deleted file mode 100644 index 9a57f9b37..000000000 --- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "gemma-1.1-7b-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf" - } - ], - "id": "gemma-1.1-7b-it", - "object": "model", - "name": "Gemma 1.1 7B Q4", - "version": "1.3", - "description": "Google's Gemma is built for multilingual purpose", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "user\n{prompt}\nmodel", - "llama_model_path": "gemma-1.1-7b-it-Q4_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Google", - "tags": ["7B", "Finetuned"], - "size": 5330000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json deleted file mode 100644 index 66eaff7c2..000000000 --- a/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "sources": [ - { - "filename": "gemma-2-27b-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/gemma-2-27b-it-GGUF/resolve/main/gemma-2-27b-it-Q4_K_M.gguf" - } - ], - "id": "gemma-2-27b-it", - "object": "model", - "name": "Gemma 2 27B Q4", - "version": "1.1", - "description": "Gemma is built from the same technology with Google's Gemini.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "user\n{prompt}\nmodel\n\nmodel\n", - "llama_model_path": "gemma-2-27b-it-Q4_K_M.gguf", - "ngl": 47 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [ - "" - ], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Google", - "tags": [ - "27B", - "Conversational", - "Text-generation" - ], - "size": 16600000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json deleted file mode 100644 index 60be558b8..000000000 --- a/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "sources": [ - { - "filename": "gemma-2-2b-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf" - } - ], - "id": "gemma-2-2b-it", - "object": "model", - "name": "Gemma 2 2B Q4", - "version": "1.1", - "description": "Gemma is built from the same technology with Google's Gemini.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "user\n{prompt}\nmodel\n\nmodel\n", - "llama_model_path": "gemma-2-2b-it-Q4_K_M.gguf", - "ngl": 27 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [ - "" - ], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Google", - "tags": [ - "2B", - "Tiny", - "Conversational", - "Text-generation" - ], - "size": 1710000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json deleted file mode 100644 index 67acaad09..000000000 --- a/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "sources": [ - { - "filename": "gemma-2-9b-it-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/resolve/main/gemma-2-9b-it-Q4_K_M.gguf" - } - ], - "id": "gemma-2-9b-it", - "object": "model", - "name": "Gemma 2 9B Q4", - "version": "1.1", - "description": "Gemma is built from the same technology with Google's Gemini.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "user\n{prompt}\nmodel\n\nmodel\n", - "llama_model_path": "gemma-2-9b-it-Q4_K_M.gguf", - "ngl": 43 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [ - "" - ], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Google", - "tags": [ - "9B", - "Conversational", - "Text-generation" - ], - "size": 5760000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json deleted file mode 100644 index c91a0a73b..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "llama-2-70b-chat.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf" - } - ], - "id": "llama2-chat-70b", - "object": "model", - "name": "Llama 2 Chat 70B Q4", - "version": "1.1", - "description": "Llama 2 specifically designed for a comprehensive understanding the world.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", - "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf", - "ngl": 81 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": ["70B", "Foundational Model"], - "size": 43920000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json deleted file mode 100644 index 4a28f6004..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "llama-2-7b-chat.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf" - } - ], - "id": "llama2-chat-7b", - "object": "model", - "name": "Llama 2 Chat 7B Q4", - "version": "1.1", - "description": "Llama 2 specifically designed for a comprehensive understanding the world.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "[INST] <>\n{system_message}<>\n{prompt}[/INST]", - "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": ["7B", "Foundational Model"], - "size": 4080000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json deleted file mode 100644 index 3456a185e..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_M.gguf" - } - ], - "id": "llama3-8b-instruct", - "object": "model", - "name": "Llama 3 8B Instruct Q4", - "version": "1.4", - "description": "Meta's Llama 3 excels at general usage situations, including chat, general world knowledge, and coding.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - "llama_model_path": "Meta-Llama-3-8B-Instruct-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": ["<|end_of_text|>","<|eot_id|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": ["8B"], - "size": 4920000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json deleted file mode 100644 index 718629fb0..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "sources": [ - { - "filename": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", - "url": "https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" - } - ], - "id": "llama3-hermes-8b", - "object": "model", - "name": "Hermes Pro Llama 3 8B Q4", - "version": "1.2", - "description": "Hermes Pro is well-designed for General chat and JSON output.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "NousResearch", - "tags": [ - "7B", - "Finetuned" - ], - "size": 4920000000 - }, - "engine": "llama-cpp" - } diff --git a/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json deleted file mode 100644 index aec73719e..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "sources": [ - { - "filename": "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf" - } - ], - "id": "llama3.1-70b-instruct", - "object": "model", - "name": "Llama 3.1 70B Instruct Q4", - "version": "1.2", - "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - "llama_model_path": "Meta-Llama-3.1-70B-Instruct-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": [ - "<|end_of_text|>", - "<|eot_id|>", - "<|eom_id|>" - ], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": [ - "70B" - ], - "size": 42500000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json deleted file mode 100644 index 1aeb80450..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" - } - ], - "id": "llama3.1-8b-instruct", - "object": "model", - "name": "Llama 3.1 8B Instruct Q4", - "version": "1.2", - "description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - "llama_model_path": "Meta-Llama-3.1-8B-Instruct.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": ["8B", "Featured"], - "size": 4920000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json deleted file mode 100644 index 0fe7d3316..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "Llama-3.2-1B-Instruct-Q8_0.gguf", - "url": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf" - } - ], - "id": "llama3.2-1b-instruct", - "object": "model", - "name": "Llama 3.2 1B Instruct Q8", - "version": "1.0", - "description": "Meta's Llama 3.2 excels at general usage situations, including chat, general world knowledge, and coding.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - "llama_model_path": "Llama-3.2-1B-Instruct-Q8_0.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": ["1B", "Featured"], - "size": 1320000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json deleted file mode 100644 index 299362fbf..000000000 --- a/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "Llama-3.2-3B-Instruct-Q8_0.gguf", - "url": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf" - } - ], - "id": "llama3.2-3b-instruct", - "object": "model", - "name": "Llama 3.2 3B Instruct Q8", - "version": "1.0", - "description": "Meta's Llama 3.2 excels at general usage situations, including chat, general world knowledge, and coding.", - "format": "gguf", - "settings": { - "ctx_len": 131072, - "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n", - "llama_model_path": "Llama-3.2-3B-Instruct-Q8_0.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MetaAI", - "tags": ["3B", "Featured"], - "size": 3420000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json deleted file mode 100644 index 3230df5b0..000000000 --- a/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "sources": [ - { - "url":"https://huggingface.co/janhq/llamacorn-1.1b-chat-GGUF/resolve/main/llamacorn-1.1b-chat.Q8_0.gguf", - "filename": "llamacorn-1.1b-chat.Q8_0.gguf" - } - ], - "id": "llamacorn-1.1b", - "object": "model", - "name": "LlamaCorn 1.1B Q8", - "version": "1.1", - "description": "LlamaCorn is designed to improve chat functionality from TinyLlama.", - "format": "gguf", - "settings": { - "ctx_len": 2048, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "llamacorn-1.1b-chat.Q8_0.gguf", - "ngl": 23 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 2048, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Jan", - "tags": [ - "Tiny", - "Finetuned" - ], - "size": 1170000000 - }, - "engine": "llama-cpp" - } \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json deleted file mode 100644 index fe058e259..000000000 --- a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "sources": [ - { - "filename": "llava-v1.6-vicuna-13b.Q4_K_M.gguf", - "url": "https://huggingface.co/cjpais/llava-v1.6-vicuna-13b-gguf/resolve/main/llava-v1.6-vicuna-13b.Q4_K_M.gguf" - }, - { - "filename": "mmproj-model-f16.gguf", - "url": "https://huggingface.co/cjpais/llava-v1.6-vicuna-13b-gguf/resolve/main/mmproj-model-f16.gguf" - } - ], - "id": "llava-13b", - "object": "model", - "name": "LlaVa 13B Q4", - "version": "1.2", - "description": "LlaVa can bring vision understanding to Jan", - "format": "gguf", - "settings": { - "vision_model": true, - "text_model": false, - "ctx_len": 4096, - "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", - "llama_model_path": "llava-v1.6-vicuna-13b.Q4_K_M.gguf", - "mmproj": "mmproj-model-f16.gguf", - "ngl": 33 - }, - "parameters": { - "max_tokens": 4096, - "stop": [""] - }, - "metadata": { - "author": "liuhaotian", - "tags": ["Vision"], - "size": 7870000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json deleted file mode 100644 index 8e5cdf09f..000000000 --- a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "sources": [ - { - "filename": "llava-v1.6-mistral-7b.Q4_K_M.gguf", - "url": "https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q4_K_M.gguf" - }, - { - "filename": "mmproj-model-f16.gguf", - "url": "https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" - } - ], - "id": "llava-7b", - "object": "model", - "name": "LlaVa 7B", - "version": "1.2", - "description": "LlaVa can bring vision understanding to Jan", - "format": "gguf", - "settings": { - "vision_model": true, - "text_model": false, - "ctx_len": 4096, - "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n", - "llama_model_path": "llava-v1.6-mistral-7b.Q4_K_M.gguf", - "mmproj": "mmproj-model-f16.gguf", - "ngl": 33 - }, - "parameters": { - "max_tokens": 4096, - "stop": [""] - }, - "metadata": { - "author": "liuhaotian", - "tags": ["Vision"], - "size": 4370000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json deleted file mode 100644 index 9b568e468..000000000 --- a/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF/resolve/main/Mistral-7B-Instruct-v0.3-Q4_K_M.gguf" - } - ], - "id": "mistral-ins-7b-q4", - "object": "model", - "name": "Mistral 7B Instruct Q4", - "version": "1.5", - "description": "Mistral 7B Instruct model, specifically designed for a comprehensive understanding of the world.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "{system_message} [INST] {prompt} [/INST]", - "llama_model_path": "Mistral-7B-Instruct-v0.3-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["[/INST]"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MistralAI", - "tags": ["7B", "Foundational Model"], - "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png" - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json deleted file mode 100644 index c711065ff..000000000 --- a/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "sources": [ - { - "filename": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf" - } - ], - "id": "mixtral-8x7b-instruct", - "object": "model", - "name": "Mixtral 8x7B Instruct Q4", - "version": "1.1", - "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "[INST] {prompt} [/INST]", - "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", - "ngl": 100 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "MistralAI, TheBloke", - "tags": ["70B", "Foundational Model"], - "size": 26440000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json deleted file mode 100644 index 1999035aa..000000000 --- a/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "Noromaid-7B-0.4-DPO.q4_k_m.gguf", - "url": "https://huggingface.co/NeverSleep/Noromaid-7B-0.4-DPO-GGUF/resolve/main/Noromaid-7B-0.4-DPO.q4_k_m.gguf" - } - ], - "id": "noromaid-7b", - "object": "model", - "name": "Noromaid 7B Q4", - "version": "1.2", - "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Noromaid-7B-0.4-DPO.q4_k_m.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "NeverSleep", - "tags": ["7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json deleted file mode 100644 index 05371b69e..000000000 --- a/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "openchat-3.5-0106.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/openchat-3.5-0106-GGUF/resolve/main/openchat-3.5-0106.Q4_K_M.gguf" - } - ], - "id": "openchat-3.5-7b", - "object": "model", - "name": "Openchat-3.5 7B Q4", - "version": "1.2", - "description": "The performance of Openchat surpasses ChatGPT-3.5 and Grok-1 across various benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 8192, - "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:", - "llama_model_path": "openchat-3.5-0106.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 8192, - "stop": ["<|end_of_turn|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Openchat", - "tags": ["Recommended", "7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json deleted file mode 100644 index 90aa50117..000000000 --- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "sources": [ - { - "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf", - "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf" - } - ], - "id": "phi3-3.8b", - "object": "model", - "name": "Phi-3 Mini Instruct Q4", - "version": "1.3", - "description": "Phi-3 Mini is Microsoft's newest, compact model designed for mobile use.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", - "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "max_tokens": 4096, - "stop": ["<|end|>"], - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Microsoft", - "tags": [ - "3B", - "Finetuned" - ], - "size": 2320000000 - }, - "engine": "llama-cpp" -} \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json deleted file mode 100644 index afce04952..000000000 --- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "sources": [ - { - "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf", - "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf" - } - ], - "id": "phi3-medium", - "object": "model", - "name": "Phi-3 Medium Instruct Q4", - "version": "1.4", - "description": "Phi-3 Medium is Microsoft's latest SOTA model.", - "format": "gguf", - "settings": { - "ctx_len": 128000, - "prompt_template": "<|user|> {prompt}<|end|><|assistant|>", - "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "max_tokens": 128000, - "stop": ["<|end|>"], - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Microsoft", - "tags": [ - "14B", - "Finetuned" - ], - "size": 8366000000 - }, - "engine": "llama-cpp" - } \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/phind-34b/model.json b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json deleted file mode 100644 index f6e302173..000000000 --- a/extensions/inference-cortex-extension/resources/models/phind-34b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "phind-codellama-34b-v2.Q5_K_M.gguf", - "url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf" - } - ], - "id": "phind-34b", - "object": "model", - "name": "Phind 34B Q4", - "version": "1.3", - "description": "Phind 34B is the best Open-source coding model.", - "format": "gguf", - "settings": { - "ctx_len": 16384, - "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant", - "llama_model_path": "phind-codellama-34b-v2.Q4_K_M.gguf", - "ngl": 49 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 16384, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Phind", - "tags": ["34B", "Finetuned"], - "size": 20220000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json deleted file mode 100644 index be37cac0d..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "qwen1_5-7b-chat-q4_k_m.gguf", - "url": "https://huggingface.co/Qwen/Qwen1.5-7B-Chat-GGUF/resolve/main/qwen1_5-7b-chat-q4_k_m.gguf" - } - ], - "id": "qwen-7b", - "object": "model", - "name": "Qwen Chat 7B Q4", - "version": "1.2", - "description": "Qwen is optimized at Chinese, ideal for everyday tasks.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "qwen1_5-7b-chat-q4_k_m.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Alibaba", - "tags": ["7B", "Finetuned"], - "size": 4770000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json deleted file mode 100644 index 210848a43..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2-7B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2-7B-Instruct-GGUF/resolve/main/Qwen2-7B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2-7b", - "object": "model", - "name": "Qwen 2 7B Instruct Q4", - "version": "1.2", - "description": "Qwen is optimized at Chinese, ideal for everyday tasks.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2-7B-Instruct-Q4_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Alibaba", - "tags": ["7B", "Finetuned"], - "size": 4680000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json deleted file mode 100644 index 96e4d214c..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-14B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-14b-instruct", - "object": "model", - "name": "Qwen2.5 14B Instruct Q4", - "version": "1.0", - "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-14B-Instruct-Q4_K_M.gguf", - "ngl": 49 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["14B", "Featured"], - "size": 8990000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json deleted file mode 100644 index 20681dff4..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-32B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF/resolve/main/Qwen2.5-32B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-32b-instruct", - "object": "model", - "name": "Qwen2.5 32B Instruct Q4", - "version": "1.0", - "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-32B-Instruct-Q4_K_M.gguf", - "ngl": 65 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["32B"], - "size": 19900000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json deleted file mode 100644 index b741539eb..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-72B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF/resolve/main/Qwen2.5-72B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-72b-instruct", - "object": "model", - "name": "Qwen2.5 72B Instruct Q4", - "version": "1.0", - "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-72B-Instruct-Q4_K_M.gguf", - "ngl": 81 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["72B"], - "size": 47400000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json deleted file mode 100644 index 6741aef64..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-7b-instruct", - "object": "model", - "name": "Qwen2.5 7B Instruct Q4", - "version": "1.0", - "description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-7B-Instruct-Q4_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["7B", "Featured"], - "size": 4680000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json deleted file mode 100644 index a445ee2db..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-14b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-14B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-coder-14b-instruct", - "object": "model", - "name": "Qwen2.5 Coder 14B Instruct Q4", - "version": "1.0", - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-Coder-14B-Instruct-Q4_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["14B", "Featured"], - "size": 8990000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json deleted file mode 100644 index cffdf03df..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-32b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-32B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-coder-32b-instruct", - "object": "model", - "name": "Qwen2.5 Coder 32B Instruct Q4", - "version": "1.0", - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-Coder-32B-Instruct-Q4_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["32B", "Featured"], - "size": 19900000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json deleted file mode 100644 index 9162c8a43..000000000 --- a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", - "url": "https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf" - } - ], - "id": "qwen2.5-coder-7b-instruct", - "object": "model", - "name": "Qwen2.5 Coder 7B Instruct Q4", - "version": "1.0", - "description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf", - "ngl": 29 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": ["<|endoftext|>", "<|im_end|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "QwenLM", - "tags": ["7B", "Featured"], - "size": 4680000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json deleted file mode 100644 index a6c84bd17..000000000 --- a/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "url": "https://huggingface.co/TheBloke/stablelm-zephyr-3b-GGUF/resolve/main/stablelm-zephyr-3b.Q8_0.gguf", - "filename": "stablelm-zephyr-3b.Q8_0.gguf" - } - ], - "id": "stable-zephyr-3b", - "object": "model", - "name": "Stable Zephyr 3B Q8", - "version": "1.1", - "description": "StableLM Zephyr 3B is a best model for low-end machine.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|user|>\n{prompt}<|endoftext|>\n<|assistant|>", - "llama_model_path": "stablelm-zephyr-3b.Q8_0.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": ["<|endoftext|>"], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "StabilityAI", - "tags": ["3B", "Finetuned", "Tiny"], - "size": 2970000000 - }, - "engine": "llama-cpp" - } \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json deleted file mode 100644 index ffb32922e..000000000 --- a/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "sources": [ - { - "filename": "stealth-v1.3.Q4_K_M.gguf", - "url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf" - } - ], - "id": "stealth-v1.2-7b", - "object": "model", - "name": "Stealth 7B Q4", - "version": "1.2", - "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "stealth-v1.3.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Jan", - "tags": ["7B", "Finetuned"], - "size": 4370000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json deleted file mode 100644 index b6aeea3e3..000000000 --- a/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" - } - ], - "id": "tinyllama-1.1b", - "object": "model", - "name": "TinyLlama Chat 1.1B Q4", - "version": "1.1", - "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>", - "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", - "ngl": 23 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 2048, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "TinyLlama", - "tags": ["Tiny", "Foundation Model"], - "size": 669000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json deleted file mode 100644 index fae5d0ca5..000000000 --- a/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "trinity-v1.2.Q4_K_M.gguf", - "url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf" - } - ], - "id": "trinity-v1.2-7b", - "object": "model", - "name": "Trinity-v1.2 7B Q4", - "version": "1.2", - "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "trinity-v1.2.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Jan", - "tags": ["7B", "Merged"], - "size": 4370000000, - "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png" - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json deleted file mode 100644 index 46b6999a6..000000000 --- a/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "sources": [ - { - "filename": "vistral-7b-chat-dpo.Q4_K_M.gguf", - "url": "https://huggingface.co/janhq/vistral-7b-chat-dpo-GGUF/resolve/main/vistral-7b-chat-dpo.Q4_K_M.gguf" - } - ], - "id": "vistral-7b", - "object": "model", - "name": "Vistral 7B Q4", - "version": "1.2", - "description": "Vistral 7B has a deep understanding of Vietnamese.", - "format": "gguf", - "settings": { - "ctx_len": 32768, - "prompt_template": "[INST] <>\n{system_message}\n<>\n{prompt} [/INST]", - "llama_model_path": "vistral-7b-chat-dpo.Q4_K_M.gguf", - "ngl": 33 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 32768, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "Viet Mistral, Jan", - "tags": ["7B", "Finetuned"], - "size": 4410000000 - }, - "engine": "llama-cpp" - } - \ No newline at end of file diff --git a/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json deleted file mode 100644 index cf39ad857..000000000 --- a/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q4_K_M.gguf" - } - ], - "id": "wizardcoder-13b", - "object": "model", - "name": "Wizard Coder Python 13B Q4", - "version": "1.2", - "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.", - "format": "gguf", - "settings": { - "ctx_len": 16384, - "prompt_template": "### Instruction:\n{prompt}\n### Response:", - "llama_model_path": "wizardcoder-python-13b-v1.0.Q4_K_M.gguf", - "ngl": 41 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 16384, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "WizardLM, The Bloke", - "tags": ["Recommended", "13B", "Finetuned"], - "size": 7870000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/resources/models/yi-34b/model.json b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json deleted file mode 100644 index 4f56650d7..000000000 --- a/extensions/inference-cortex-extension/resources/models/yi-34b/model.json +++ /dev/null @@ -1,35 +0,0 @@ -{ - "sources": [ - { - "filename": "yi-34b-chat.Q4_K_M.gguf", - "url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q4_K_M.gguf" - } - ], - "id": "yi-34b", - "object": "model", - "name": "Yi 34B Q4", - "version": "1.1", - "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.", - "format": "gguf", - "settings": { - "ctx_len": 4096, - "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", - "llama_model_path": "yi-34b-chat.Q4_K_M.gguf", - "ngl": 61 - }, - "parameters": { - "temperature": 0.7, - "top_p": 0.95, - "stream": true, - "max_tokens": 4096, - "stop": [], - "frequency_penalty": 0, - "presence_penalty": 0 - }, - "metadata": { - "author": "01-ai, The Bloke", - "tags": ["34B", "Foundational Model"], - "size": 20660000000 - }, - "engine": "llama-cpp" -} diff --git a/extensions/inference-cortex-extension/rolldown.config.mjs b/extensions/inference-cortex-extension/rolldown.config.mjs index 2ff631361..f8d0587b1 100644 --- a/extensions/inference-cortex-extension/rolldown.config.mjs +++ b/extensions/inference-cortex-extension/rolldown.config.mjs @@ -1,60 +1,6 @@ import { defineConfig } from 'rolldown' import packageJson from './package.json' with { type: 'json' } import defaultSettingJson from './resources/default_settings.json' with { type: 'json' } -import bakllavaJson from './resources/models/bakllava-1/model.json' with { type: 'json' } -import codeninja7bJson from './resources/models/codeninja-1.0-7b/model.json' with { type: 'json' } -import commandr34bJson from './resources/models/command-r-34b/model.json' with { type: 'json' } -import deepseekCoder13bJson from './resources/models/deepseek-coder-1.3b/model.json' with { type: 'json' } -import deepseekCoder34bJson from './resources/models/deepseek-coder-34b/model.json' with { type: 'json' } -import gemma112bJson from './resources/models/gemma-1.1-2b/model.json' with { type: 'json' } -import gemma117bJson from './resources/models/gemma-1.1-7b/model.json' with { type: 'json' } -import llama2Chat70bJson from './resources/models/llama2-chat-70b/model.json' with { type: 'json' } -import llama2Chat7bJson from './resources/models/llama2-chat-7b/model.json' with { type: 'json' } -import llamacorn1bJson from './resources/models/llamacorn-1.1b/model.json' with { type: 'json' } -import llava13bJson from './resources/models/llava-13b/model.json' with { type: 'json' } -import llava7bJson from './resources/models/llava-7b/model.json' with { type: 'json' } -import mistralIns7bq4Json from './resources/models/mistral-ins-7b-q4/model.json' with { type: 'json' } -import mixtral8x7bInstructJson from './resources/models/mixtral-8x7b-instruct/model.json' with { type: 'json' } -import noromaid7bJson from './resources/models/noromaid-7b/model.json' with { type: 'json' } -import openchat357bJson from './resources/models/openchat-3.5-7b/model.json' with { type: 'json' } -import phi3bJson from './resources/models/phi3-3.8b/model.json' with { type: 'json' } -import phind34bJson from './resources/models/phind-34b/model.json' with { type: 'json' } -import qwen7bJson from './resources/models/qwen-7b/model.json' with { type: 'json' } -import stableZephyr3bJson from './resources/models/stable-zephyr-3b/model.json' with { type: 'json' } -import stealthv127bJson from './resources/models/stealth-v1.2-7b/model.json' with { type: 'json' } -import tinyllama11bJson from './resources/models/tinyllama-1.1b/model.json' with { type: 'json' } -import trinityv127bJson from './resources/models/trinity-v1.2-7b/model.json' with { type: 'json' } -import vistral7bJson from './resources/models/vistral-7b/model.json' with { type: 'json' } -import wizardcoder13bJson from './resources/models/wizardcoder-13b/model.json' with { type: 'json' } -import yi34bJson from './resources/models/yi-34b/model.json' with { type: 'json' } -import llama3Json from './resources/models/llama3-8b-instruct/model.json' with { type: 'json' } -import llama3Hermes8bJson from './resources/models/llama3-hermes-8b/model.json' with { type: 'json' } -import aya8bJson from './resources/models/aya-23-8b/model.json' with { type: 'json' } -import aya35bJson from './resources/models/aya-23-35b/model.json' with { type: 'json' } -import phimediumJson from './resources/models/phi3-medium/model.json' with { type: 'json' } -import codestralJson from './resources/models/codestral-22b/model.json' with { type: 'json' } -import qwen2Json from './resources/models/qwen2-7b/model.json' with { type: 'json' } -import llama318bJson from './resources/models/llama3.1-8b-instruct/model.json' with { type: 'json' } -import llama3170bJson from './resources/models/llama3.1-70b-instruct/model.json' with { type: 'json' } -import gemma22bJson from './resources/models/gemma-2-2b/model.json' with { type: 'json' } -import gemma29bJson from './resources/models/gemma-2-9b/model.json' with { type: 'json' } -import gemma227bJson from './resources/models/gemma-2-27b/model.json' with { type: 'json' } -import llama321bJson from './resources/models/llama3.2-1b-instruct/model.json' with { type: 'json' } -import llama323bJson from './resources/models/llama3.2-3b-instruct/model.json' with { type: 'json' } -import qwen257bJson from './resources/models/qwen2.5-7b-instruct/model.json' with { type: 'json' } -import qwen25coder7bJson from './resources/models/qwen2.5-coder-7b-instruct/model.json' with { type: 'json' } -import qwen25coder14bJson from './resources/models/qwen2.5-coder-14b-instruct/model.json' with { type: 'json' } -import qwen25coder32bJson from './resources/models/qwen2.5-coder-32b-instruct/model.json' with { type: 'json' } -import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' with { type: 'json' } -import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' } -import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' } - -import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' } -import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' } -import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' } -import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' } -import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' } -import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' } export default defineConfig([ { @@ -65,61 +11,6 @@ export default defineConfig([ }, platform: 'browser', define: { - MODELS: JSON.stringify([ - bakllavaJson, - codeninja7bJson, - commandr34bJson, - deepseekCoder13bJson, - deepseekCoder34bJson, - gemma112bJson, - gemma117bJson, - llama2Chat70bJson, - llama2Chat7bJson, - llamacorn1bJson, - llava13bJson, - llava7bJson, - mistralIns7bq4Json, - mixtral8x7bInstructJson, - noromaid7bJson, - openchat357bJson, - phi3bJson, - phind34bJson, - qwen7bJson, - stableZephyr3bJson, - stealthv127bJson, - tinyllama11bJson, - trinityv127bJson, - vistral7bJson, - wizardcoder13bJson, - yi34bJson, - llama3Json, - llama3Hermes8bJson, - phimediumJson, - aya8bJson, - aya35bJson, - codestralJson, - qwen2Json, - llama318bJson, - llama3170bJson, - gemma22bJson, - gemma29bJson, - gemma227bJson, - llama321bJson, - llama323bJson, - qwen257bJson, - qwen25coder7bJson, - qwen25coder14bJson, - qwen25coder32bJson, - qwen2514bJson, - qwen2532bJson, - qwen2572bJson, - deepseekR1DistillQwen_1_5b, - deepseekR1DistillQwen_7b, - deepseekR1DistillQwen_14b, - deepseekR1DistillQwen_32b, - deepseekR1DistillLlama_8b, - deepseekR1DistillLlama_70b, - ]), NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`), SETTINGS: JSON.stringify(defaultSettingJson), CORTEX_API_URL: JSON.stringify(`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? "39291"}`), diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts index 2de432c29..52f97b9ab 100644 --- a/extensions/inference-cortex-extension/src/@types/global.d.ts +++ b/extensions/inference-cortex-extension/src/@types/global.d.ts @@ -3,4 +3,3 @@ declare const CORTEX_API_URL: string declare const CORTEX_SOCKET_URL: string declare const CORTEX_ENGINE_VERSION: string declare const SETTINGS: any -declare const MODELS: any diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 05efaf735..f864f0c44 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -10,7 +10,6 @@ import { Model, executeOnMain, EngineEvent, - systemInformation, joinPath, LocalOAIEngine, InferenceEngine, @@ -84,10 +83,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { * Subscribes to events emitted by the @janhq/core package. */ async onLoad() { - const models = MODELS as Model[] - - this.registerModels(models) - super.onLoad() // Register Settings diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx index 25875dc2a..58413a3b7 100644 --- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx +++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx @@ -1,4 +1,4 @@ -import React, { Fragment, useState } from 'react' +import React, { Fragment, useMemo, useState } from 'react' import Image from 'next/image' @@ -70,6 +70,14 @@ const OnDeviceStarterScreen = ({ isShowStarterScreen }: Props) => { manualRecommendationModel.includes(x.id) ) + const filteredModels = useMemo( + () => + sources?.filter((x) => + x.id.toLowerCase().includes(searchValue.toLowerCase()) + ), + [sources, searchValue] + ) + const remoteModel = configuredModels.filter( (x) => !isLocalEngine(engines, x.engine) ) @@ -139,7 +147,7 @@ const OnDeviceStarterScreen = ({ isShowStarterScreen }: Props) => {

) : ( - sources?.map((model) => { + filteredModels?.map((model) => { const isDownloading = downloadingModels.some( (md) => md === (model.models[0]?.id ?? model.id) )