chore: add llama3.2 and qwen models
This commit is contained in:
parent
b0b49f44f6
commit
358d8f83f9
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.18",
|
||||
"version": "1.0.19",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "command-r-34b",
|
||||
"object": "model",
|
||||
"name": "Command-R v01 34B Q4",
|
||||
"version": "1.5",
|
||||
"version": "1.6",
|
||||
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -28,7 +28,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "CohereAI",
|
||||
"tags": ["34B", "Finetuned", "Featured"],
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"size": 21500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "gemma-2-27b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2 27B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -33,8 +33,7 @@
|
||||
"tags": [
|
||||
"27B",
|
||||
"Conversational",
|
||||
"Text-generation",
|
||||
"Featured"
|
||||
"Text-generation"
|
||||
],
|
||||
"size": 16600000000
|
||||
},
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "gemma-2-2b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2 2B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -34,8 +34,7 @@
|
||||
"2B",
|
||||
"Tiny",
|
||||
"Conversational",
|
||||
"Text-generation",
|
||||
"Featured"
|
||||
"Text-generation"
|
||||
],
|
||||
"size": 1710000000
|
||||
},
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "gemma-2-9b-it",
|
||||
"object": "model",
|
||||
"name": "Gemma 2 9B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Gemma is built from the same technology with Google's Gemini.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -33,8 +33,7 @@
|
||||
"tags": [
|
||||
"9B",
|
||||
"Conversational",
|
||||
"Text-generation",
|
||||
"Featured"
|
||||
"Text-generation"
|
||||
],
|
||||
"size": 5760000000
|
||||
},
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "llama3.1-70b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3.1 70B Instruct Q4",
|
||||
"version": "1.1",
|
||||
"version": "1.2",
|
||||
"description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -33,8 +33,7 @@
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": [
|
||||
"70B",
|
||||
"Featured"
|
||||
"70B"
|
||||
],
|
||||
"size": 42500000000
|
||||
},
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "llama3.1-8b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3.1 8B Instruct Q4",
|
||||
"version": "1.1",
|
||||
"version": "1.2",
|
||||
"description": "Meta's Llama 3.1 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -33,8 +33,7 @@
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": [
|
||||
"8B",
|
||||
"Featured"
|
||||
"8B", "Featured"
|
||||
],
|
||||
"size": 4920000000
|
||||
},
|
||||
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Llama-3.2-1B-Instruct-Q8_0.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf"
|
||||
}
|
||||
],
|
||||
"id": "llama3.2-1b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3.2 1B Instruct Q8",
|
||||
"version": "1.0",
|
||||
"description": "Meta's Llama 3.2 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"llama_model_path": "Llama-3.2-1B-Instruct-Q8_0.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": ["1B", "Featured"],
|
||||
"size": 1320000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -0,0 +1,35 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Llama-3.2-3B-Instruct-Q8_0.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q8_0.gguf"
|
||||
}
|
||||
],
|
||||
"id": "llama3.2-3b-instruct",
|
||||
"object": "model",
|
||||
"name": "Llama 3.2 3B Instruct Q8",
|
||||
"version": "1.0",
|
||||
"description": "Meta's Llama 3.2 excels at general usage situations, including chat, general world knowledge, and coding.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 131072,
|
||||
"prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",
|
||||
"llama_model_path": "Llama-3.2-3B-Instruct-Q8_0.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 8192,
|
||||
"stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "MetaAI",
|
||||
"tags": ["3B", "Featured"],
|
||||
"size": 3420000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-14B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-14B-Instruct-GGUF/resolve/main/Qwen2.5-14B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-14b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 14B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-14B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 49
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["14B", "Featured"],
|
||||
"size": 8990000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-32B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-32B-Instruct-GGUF/resolve/main/Qwen2.5-32B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-32b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 32B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-32B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 65
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["32B"],
|
||||
"size": 19900000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-72B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-72B-Instruct-GGUF/resolve/main/Qwen2.5-72B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-72b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 72B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-72B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 81
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["72B"],
|
||||
"size": 47400000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-7B-Instruct-GGUF/resolve/main/Qwen2.5-7B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-7b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 7B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "The Qwen 2.5 family is specifically designed to excel in math, coding, and other computational tasks",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-7B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"filename": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf",
|
||||
"url": "https://huggingface.co/bartowski/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "qwen2.5-coder-7b-instruct",
|
||||
"object": "model",
|
||||
"name": "Qwen2.5 Coder 7B Instruct Q4",
|
||||
"version": "1.0",
|
||||
"description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models. Significantly improvements in code generation, code reasoning and code fixing.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
"ctx_len": 32768,
|
||||
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
|
||||
"llama_model_path": "Qwen2.5-Coder-7B-Instruct-Q4_K_M.gguf",
|
||||
"ngl": 29
|
||||
},
|
||||
"parameters": {
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95,
|
||||
"stream": true,
|
||||
"max_tokens": 32768,
|
||||
"stop": [],
|
||||
"frequency_penalty": 0,
|
||||
"presence_penalty": 0
|
||||
},
|
||||
"metadata": {
|
||||
"author": "QwenLM",
|
||||
"tags": ["7B", "Featured"],
|
||||
"size": 4680000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
}
|
||||
|
||||
@ -45,6 +45,13 @@ const llama3170bJson = require('./resources/models/llama3.1-70b-instruct/model.j
|
||||
const gemma22bJson = require('./resources/models/gemma-2-2b/model.json')
|
||||
const gemma29bJson = require('./resources/models/gemma-2-9b/model.json')
|
||||
const gemma227bJson = require('./resources/models/gemma-2-27b/model.json')
|
||||
const llama321bJson = require('./resources/models/llama3.2-1b-instruct/model.json')
|
||||
const llama323bJson = require('./resources/models/llama3.2-3b-instruct/model.json')
|
||||
const qwen257bJson = require('./resources/models/qwen2.5-7b-instruct/model.json')
|
||||
const qwen25coder7bJson = require('./resources/models/qwen2.5-coder-7b-instruct/model.json')
|
||||
const qwen2514bJson = require('./resources/models/qwen2.5-14b-instruct/model.json')
|
||||
const qwen2532bJson = require('./resources/models/qwen2.5-32b-instruct/model.json')
|
||||
const qwen2572bJson = require('./resources/models/qwen2.5-72b-instruct/model.json')
|
||||
|
||||
export default [
|
||||
{
|
||||
@ -97,6 +104,13 @@ export default [
|
||||
gemma22bJson,
|
||||
gemma29bJson,
|
||||
gemma227bJson,
|
||||
llama321bJson,
|
||||
llama323bJson,
|
||||
qwen257bJson,
|
||||
qwen25coder7bJson,
|
||||
qwen2514bJson,
|
||||
qwen2532bJson,
|
||||
qwen2572bJson,
|
||||
]),
|
||||
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||
DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
|
||||
|
||||
@ -58,7 +58,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
|
||||
const configuredModels = useAtomValue(configuredModelsAtom)
|
||||
const setMainViewState = useSetAtom(mainViewStateAtom)
|
||||
|
||||
const recommendModel = ['gemma-2-2b-it', 'llama3.1-8b-instruct']
|
||||
const recommendModel = ['llama3.2-1b-instruct', 'llama3.2-3b-instruct']
|
||||
|
||||
const featuredModel = configuredModels.filter((x) => {
|
||||
const manualRecommendModel = configuredModels.filter((x) =>
|
||||
@ -219,9 +219,6 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
|
||||
>
|
||||
<div className="w-full text-left">
|
||||
<h6 className="font-medium">{featModel.name}</h6>
|
||||
<p className="mt-2 font-medium text-[hsla(var(--text-secondary))]">
|
||||
{featModel.metadata.author}
|
||||
</p>
|
||||
</div>
|
||||
|
||||
{isDownloading ? (
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user