feat: support multiple model binaries (#1659)

* feat: Support multiple model binaries

* fix: Update downloadModel with multiple binaries handler

* feat: Add 3 models with multiple binaries

* chore: fix model download

* fix: model file lookup & model path

* chore: add .prettierrc

* chore: refactor docs

* chore: bump model version

* fix(capybara): add filename

* fix(codeninja): add file name + llama model path

* fix(default): add llama model path

* fix(deepseek coder): add filename

* fix(deepseek 33B): add filename

* fix(dolphin mixtral): add filename

* fix(llama2-chat): add filename

* fix(llama2-70B): add filename

* fix(mistral 7b): add filename + model path

* fix(bakllava): correct size model

* fix(llava-7b): correct size model

* fix(llava-13b): correct size model

* fix(mixtral-8x7b): add file name + modelpath

* fix(noramaid-7b): add file name + modelpath

* fix(openchat-7b): add file name + modelpath

* fix(openhermes-7b): add file name + modelpath

* fix(phi2-3b): add file name + modelpath

* fix(phind): add file name + modelpath

* fix(solarslerp): add file name + modelpath

* fix(starling): add file name + modelpath

* fix(stealth): add file name + modelpath

* fix(tinyllama): add file name + modelpath

* fix(trinity): add file name + modelpath

* fix(tulu): add file name + modelpath

* fix(wizardcoder): add file name + modelpath

* fix(yi): add file name + modelpath

* update from source -> sources

Signed-off-by: James <james@jan.ai>

---------

Signed-off-by: James <james@jan.ai>
Co-authored-by: hiro <vuonghoainam.work@gmail.com>
Co-authored-by: hahuyhoang411 <hahuyhoanghhh41@gmail.com>
Co-authored-by: James <james@jan.ai>
This commit is contained in:
Louis 2024-01-25 14:05:33 +07:00 committed by GitHub
parent 3b8e2c5585
commit 0e48be67e8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
53 changed files with 1117 additions and 775 deletions

5
core/.prettierignore Normal file
View File

@ -0,0 +1,5 @@
.next/
node_modules/
dist/
*.hbs
*.mdx

View File

@ -265,19 +265,22 @@ export const downloadModel = async (
const modelBinaryPath = join(directoryPath, modelId) const modelBinaryPath = join(directoryPath, modelId)
const request = require('request') const request = require('request')
const rq = request({ url: model.source_url, strictSSL, proxy })
const progress = require('request-progress') const progress = require('request-progress')
progress(rq, {})
.on('progress', function (state: any) { for (const source of model.sources) {
console.log('progress', JSON.stringify(state, null, 2)) const rq = request({ url: source, strictSSL, proxy })
}) progress(rq, {})
.on('error', function (err: Error) { .on('progress', function (state: any) {
console.error('error', err) console.debug('progress', JSON.stringify(state, null, 2))
}) })
.on('end', function () { .on('error', function (err: Error) {
console.log('end') console.error('error', err)
}) })
.pipe(fs.createWriteStream(modelBinaryPath)) .on('end', function () {
console.debug('end')
})
.pipe(fs.createWriteStream(modelBinaryPath))
}
return { return {
message: `Starting download ${modelId}`, message: `Starting download ${modelId}`,

View File

@ -21,6 +21,11 @@ export enum InferenceEngine {
hf_endpoint = 'hf_endpoint', hf_endpoint = 'hf_endpoint',
} }
export type ModelArtifact = {
filename: string
url: string
}
/** /**
* Model type defines the shape of a model object. * Model type defines the shape of a model object.
* @stored * @stored
@ -45,7 +50,7 @@ export type Model = {
/** /**
* The model download source. It can be an external url or a local filepath. * The model download source. It can be an external url or a local filepath.
*/ */
source_url: string sources: ModelArtifact[]
/** /**
* The model identifier, which can be referenced in the API endpoints. * The model identifier, which can be referenced in the API endpoints.
@ -107,6 +112,8 @@ export type ModelSettingParams = {
system_prompt?: string system_prompt?: string
ai_prompt?: string ai_prompt?: string
user_prompt?: string user_prompt?: string
llama_model_path?: string
mmproj?: string
} }
/** /**

View File

@ -56,7 +56,6 @@ jan/ # Jan root folder
- Each `model` folder contains a `model.json` file, which is a representation of a model. - Each `model` folder contains a `model.json` file, which is a representation of a model.
- `model.json` contains metadata and default parameters used to run a model. - `model.json` contains metadata and default parameters used to run a model.
- The only required field is `source_url`.
### Example ### Example
@ -64,36 +63,43 @@ Here's a standard example `model.json` for a GGUF model.
```js ```js
{ {
"id": "zephyr-7b", // Defaults to foldername "id": "zephyr-7b", // Defaults to foldername
"object": "model", // Defaults to "model" "object": "model", // Defaults to "model"
"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf", "sources": [
"name": "Zephyr 7B", // Defaults to foldername {
"owned_by": "you", // Defaults to "you" "filename": "zephyr-7b-beta.Q4_K_M.gguf",
"version": "1", // Defaults to 1 "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
"created": 1231231, // Defaults to file creation time }
"description": null, // Defaults to null ],
"state": enum[null, "ready"] "name": "Zephyr 7B", // Defaults to foldername
"format": "ggufv3", // Defaults to "ggufv3" "owned_by": "you", // Defaults to "you"
"engine": "nitro", // engine_id specified in jan/engine folder "version": "1", // Defaults to 1
"engine_parameters": { // Engine parameters inside model.json can override "created": 1231231, // Defaults to file creation time
"ctx_len": 2048, // the value inside the base engine.json "description": null, // Defaults to null
"format": "ggufv3", // Defaults to "ggufv3"
"engine": "nitro", // engine_id specified in jan/engine folder
"engine_parameters": {
// Engine parameters inside model.json can override
"ctx_len": 4096, // the value inside the base engine.json
"ngl": 100, "ngl": 100,
"embedding": true, "embedding": true,
"n_parallel": 4, "n_parallel": 4
}, },
"model_parameters": { // Models are called parameters "model_parameters": {
// Models are called parameters
"stream": true, "stream": true,
"max_tokens": 2048, "max_tokens": 4096,
"stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author "stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
"frequency_penalty": 0, "frequency_penalty": 0,
"presence_penalty": 0, "presence_penalty": 0,
"temperature": 0.7, "temperature": 0.7,
"top_p": 0.95 "top_p": 0.95
}, },
"metadata": {}, // Defaults to {} "metadata": {}, // Defaults to {}
"assets": [ // Defaults to current dir "assets": [
"file://.../zephyr-7b-q4_k_m.bin", // Defaults to current dir
] "file://.../zephyr-7b-q4_k_m.bin"
]
} }
``` ```

View File

@ -31,7 +31,6 @@ In this section, we will show you how to import a GGUF model from [HuggingFace](
## Manually Importing a Downloaded Model (nightly versions and v0.4.4+) ## Manually Importing a Downloaded Model (nightly versions and v0.4.4+)
### 1. Create a Model Folder ### 1. Create a Model Folder
Navigate to the `~/jan/models` folder. You can find this folder by going to `App Settings` > `Advanced` > `Open App Directory`. Navigate to the `~/jan/models` folder. You can find this folder by going to `App Settings` > `Advanced` > `Open App Directory`.
@ -92,7 +91,7 @@ Drag and drop your model binary into this folder, ensuring the `modelname.gguf`
#### 3. Voila #### 3. Voila
If your model doesn't show up in the Model Selector in conversations, please restart the app. If your model doesn't show up in the Model Selector in conversations, please restart the app.
If that doesn't work, please feel free to join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions. If that doesn't work, please feel free to join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.
@ -190,14 +189,18 @@ Edit `model.json` and include the following configurations:
- Ensure the filename must be `model.json`. - Ensure the filename must be `model.json`.
- Ensure the `id` property matches the folder name you created. - Ensure the `id` property matches the folder name you created.
- Ensure the GGUF filename should match the `id` property exactly. - Ensure the GGUF filename should match the `id` property exactly.
- Ensure the `source_url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab. - Ensure the `source.url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
- Ensure you are using the correct `prompt_template`. This is usually provided in the HuggingFace model's description page. - Ensure you are using the correct `prompt_template`. This is usually provided in the HuggingFace model's description page.
- Ensure the `state` property is set to `ready`.
```json title="model.json" ```json title="model.json"
{ {
// highlight-start // highlight-start
"source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf", "sources": [
{
"filename": "trinity-v1.Q4_K_M.gguf",
"url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf"
}
],
"id": "trinity-v1-7b", "id": "trinity-v1-7b",
// highlight-end // highlight-end
"object": "model", "object": "model",
@ -208,7 +211,8 @@ Edit `model.json` and include the following configurations:
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
// highlight-next-line // highlight-next-line
"prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:" "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:",
"llama_model_path": "trinity-v1.Q4_K_M.gguf"
}, },
"parameters": { "parameters": {
"max_tokens": 4096 "max_tokens": 4096
@ -218,9 +222,7 @@ Edit `model.json` and include the following configurations:
"tags": ["7B", "Merged"], "tags": ["7B", "Merged"],
"size": 4370000000 "size": 4370000000
}, },
"engine": "nitro", "engine": "nitro"
// highlight-next-line
"state": "ready"
} }
``` ```

View File

@ -40,7 +40,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
```json title="~/jan/models/gpt-3.5-turbo-16k/model.json" ```json title="~/jan/models/gpt-3.5-turbo-16k/model.json"
{ {
"source_url": "https://openai.com", "sources": [
{
"filename": "openai",
"url": "https://openai.com"
}
],
// highlight-next-line // highlight-next-line
"id": "gpt-3.5-turbo-16k", "id": "gpt-3.5-turbo-16k",
"object": "model", "object": "model",
@ -55,8 +60,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
"author": "OpenAI", "author": "OpenAI",
"tags": ["General", "Big Context Length"] "tags": ["General", "Big Context Length"]
}, },
"engine": "openai", "engine": "openai"
"state": "ready"
// highlight-end // highlight-end
} }
``` ```
@ -118,7 +122,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`
```json title="~/jan/models/mistral-ins-7b-q4/model.json" ```json title="~/jan/models/mistral-ins-7b-q4/model.json"
{ {
"source_url": "https://jan.ai", "sources": [
{
"filename": "janai",
"url": "https://jan.ai"
}
],
// highlight-next-line // highlight-next-line
"id": "mistral-ins-7b-q4", "id": "mistral-ins-7b-q4",
"object": "model", "object": "model",
@ -134,8 +143,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`
"tags": ["remote", "awesome"] "tags": ["remote", "awesome"]
}, },
// highlight-start // highlight-start
"engine": "openai", "engine": "openai"
"state": "ready"
// highlight-end // highlight-end
} }
``` ```

View File

@ -49,7 +49,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln
```json title="~/jan/models/openrouter-dolphin-mixtral-8x7b/model.json" ```json title="~/jan/models/openrouter-dolphin-mixtral-8x7b/model.json"
{ {
"source_url": "https://openrouter.ai/", "sources": [
{
"filename": "openrouter",
"url": "https://openrouter.ai/"
}
],
"id": "cognitivecomputations/dolphin-mixtral-8x7b", "id": "cognitivecomputations/dolphin-mixtral-8x7b",
"object": "model", "object": "model",
"name": "Dolphin 2.6 Mixtral 8x7B", "name": "Dolphin 2.6 Mixtral 8x7B",
@ -63,8 +68,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln
"tags": ["General", "Big Context Length"] "tags": ["General", "Big Context Length"]
}, },
// highlight-start // highlight-start
"engine": "openai", "engine": "openai"
"state": "ready"
// highlight-end // highlight-end
} }
``` ```

View File

@ -59,7 +59,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n
```json title="~/jan/models/gpt-35-hieu-jan/model.json" ```json title="~/jan/models/gpt-35-hieu-jan/model.json"
{ {
"source_url": "https://hieujan.openai.azure.com", "sources": [
{
"filename": "azure_openai",
"url": "https://hieujan.openai.azure.com"
}
],
// highlight-next-line // highlight-next-line
"id": "gpt-35-hieu-jan", "id": "gpt-35-hieu-jan",
"object": "model", "object": "model",
@ -75,8 +80,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n
"tags": ["General", "Big Context Length"] "tags": ["General", "Big Context Length"]
}, },
// highlight-start // highlight-start
"engine": "openai", "engine": "openai"
"state": "ready"
// highlight-end // highlight-end
} }
``` ```

View File

@ -59,18 +59,26 @@ components:
type: string type: string
description: State format of the model, distinct from the engine. description: State format of the model, distinct from the engine.
example: ggufv3 example: ggufv3
source_url: source:
type: string type: array
format: uri items:
description: URL to the source of the model. type: object
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf properties:
url:
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
filename:
type: string
description: Filename of the model.
example: zephyr-7b-beta.Q4_K_M.gguf
settings: settings:
type: object type: object
properties: properties:
ctx_len: ctx_len:
type: string type: string
description: Context length. description: Context length.
example: "2048" example: "4096"
ngl: ngl:
type: string type: string
description: Number of layers. description: Number of layers.
@ -94,7 +102,7 @@ components:
token_limit: token_limit:
type: string type: string
description: Token limit for the model. description: Token limit for the model.
example: "2048" example: "4096"
top_k: top_k:
type: string type: string
description: Top-k setting for the model. description: Top-k setting for the model.
@ -117,7 +125,7 @@ components:
type: string type: string
description: List of assets related to the model. description: List of assets related to the model.
required: required:
- source_url - source
ModelObject: ModelObject:
type: object type: object
properties: properties:
@ -169,11 +177,19 @@ components:
- running - running
- stopped - stopped
description: The current state of the model. description: The current state of the model.
source_url: source:
type: string type: array
format: uri items:
description: URL to the source of the model. type: object
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf properties:
url:
format: uri
description: URL to the source of the model.
example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
filename:
type: string
description: Filename of the model.
example: zephyr-7b-beta.Q4_K_M.gguf
engine_parameters: engine_parameters:
type: object type: object
properties: properties:
@ -208,8 +224,8 @@ components:
model. model.
minimum: 128 minimum: 128
maximum: 4096 maximum: 4096
default: 2048 default: 4096
example: 2048 example: 4096
n_parallel: n_parallel:
type: integer type: integer
description: Number of parallel operations, relevant when continuous batching is description: Number of parallel operations, relevant when continuous batching is
@ -241,8 +257,8 @@ components:
description: Maximum context length the model can handle. description: Maximum context length the model can handle.
minimum: 0 minimum: 0
maximum: 4096 maximum: 4096
default: 2048 default: 4096
example: 2048 example: 4096
ngl: ngl:
type: integer type: integer
description: Number of layers in the neural network. description: Number of layers in the neural network.
@ -276,8 +292,8 @@ components:
response. response.
minimum: 1 minimum: 1
maximum: 4096 maximum: 4096
default: 2048 default: 4096
example: 2048 example: 4096
top_k: top_k:
type: integer type: integer
description: Limits the model to consider only the top k most likely next tokens description: Limits the model to consider only the top k most likely next tokens
@ -318,7 +334,7 @@ components:
- created - created
- owned_by - owned_by
- state - state
- source_url - source
- parameters - parameters
- metadata - metadata
DeleteModelResponse: DeleteModelResponse:

View File

@ -119,11 +119,19 @@ async function runModel(
wrapper.model.settings.ai_prompt = prompt.ai_prompt; wrapper.model.settings.ai_prompt = prompt.ai_prompt;
} }
const modelFolderPath = path.join(janRoot, "models", wrapper.model.id);
const modelPath = wrapper.model.settings.llama_model_path
? path.join(modelFolderPath, wrapper.model.settings.llama_model_path)
: currentModelFile;
currentSettings = { currentSettings = {
llama_model_path: currentModelFile,
...wrapper.model.settings, ...wrapper.model.settings,
llama_model_path: modelPath,
// This is critical and requires real CPU physical core count (or performance core) // This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore), cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(wrapper.model.settings.mmproj && {
mmproj: path.join(modelFolderPath, wrapper.model.settings.mmproj),
}),
}; };
console.log(currentSettings); console.log(currentSettings);
return runNitroAndLoadModel(); return runNitroAndLoadModel();

View File

@ -1,6 +1,6 @@
{ {
"name": "@janhq/model-extension", "name": "@janhq/model-extension",
"version": "1.0.22", "version": "1.0.23",
"description": "Model Management Extension provides model exploration and seamless downloads", "description": "Model Management Extension provides model exploration and seamless downloads",
"main": "dist/index.js", "main": "dist/index.js",
"module": "dist/module.js", "module": "dist/module.js",

View File

@ -80,16 +80,34 @@ export default class JanModelExtension extends ModelExtension {
const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id]) const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath) if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)
// try to retrieve the download file name from the source url if (model.sources.length > 1) {
// if it fails, use the model ID as the file name // path to model binaries
const extractedFileName = await model.source_url.split('/').pop() for (const source of model.sources) {
let path = this.extractFileName(source.url)
if (source.filename) {
path = await joinPath([modelDirPath, source.filename])
}
downloadFile(source.url, path, network)
}
} else {
const fileName = this.extractFileName(model.sources[0]?.url)
const path = await joinPath([modelDirPath, fileName])
downloadFile(model.sources[0]?.url, path, network)
}
}
/**
* try to retrieve the download file name from the source url
*/
private extractFileName(url: string): string {
const extractedFileName = url.split('/').pop()
const fileName = extractedFileName const fileName = extractedFileName
.toLowerCase() .toLowerCase()
.endsWith(JanModelExtension._supportedModelFormat) .endsWith(JanModelExtension._supportedModelFormat)
? extractedFileName ? extractedFileName
: model.id : extractedFileName + JanModelExtension._supportedModelFormat
const path = await joinPath([modelDirPath, fileName]) return fileName
downloadFile(model.source_url, path, network)
} }
/** /**
@ -98,6 +116,7 @@ export default class JanModelExtension extends ModelExtension {
* @returns {Promise<void>} A promise that resolves when the download has been cancelled. * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
*/ */
async cancelModelDownload(modelId: string): Promise<void> { async cancelModelDownload(modelId: string): Promise<void> {
const model = await this.getConfiguredModels()
return abortDownload( return abortDownload(
await joinPath([JanModelExtension._homeDir, modelId, modelId]) await joinPath([JanModelExtension._homeDir, modelId, modelId])
).then(async () => { ).then(async () => {
@ -163,15 +182,16 @@ export default class JanModelExtension extends ModelExtension {
.then((files: string[]) => { .then((files: string[]) => {
// or model binary exists in the directory // or model binary exists in the directory
// model binary name can match model ID or be a .gguf file and not be an incompleted model file // model binary name can match model ID or be a .gguf file and not be an incompleted model file
// TODO: Check diff between urls, filenames
return ( return (
files.includes(modelDir) || files.includes(modelDir) ||
files.some( files.filter(
(file) => (file) =>
file file
.toLowerCase() .toLowerCase()
.includes(JanModelExtension._supportedModelFormat) && .includes(JanModelExtension._supportedModelFormat) &&
!file.endsWith(JanModelExtension._incompletedModelFileName) !file.endsWith(JanModelExtension._incompletedModelFileName)
) )?.length >= model.sources.length
) )
}) })
} }
@ -198,7 +218,6 @@ export default class JanModelExtension extends ModelExtension {
const readJsonPromises = allDirectories.map(async (dirName) => { const readJsonPromises = allDirectories.map(async (dirName) => {
// filter out directories that don't match the selector // filter out directories that don't match the selector
// read model.json // read model.json
const jsonPath = await joinPath([ const jsonPath = await joinPath([
JanModelExtension._homeDir, JanModelExtension._homeDir,
@ -226,7 +245,21 @@ export default class JanModelExtension extends ModelExtension {
const modelData = results.map((result) => { const modelData = results.map((result) => {
if (result.status === 'fulfilled') { if (result.status === 'fulfilled') {
try { try {
return result.value as Model // This to ensure backward compatibility with `model.json` with `source_url`
const tmpModel =
typeof result.value === 'object'
? result.value
: JSON.parse(result.value)
if (tmpModel['source_url'] != null) {
tmpModel['source'] = [
{
filename: tmpModel.id,
url: tmpModel['source_url'],
},
]
}
return tmpModel as Model
} catch { } catch {
console.debug(`Unable to parse model metadata: ${result.value}`) console.debug(`Unable to parse model metadata: ${result.value}`)
return undefined return undefined

View File

@ -0,0 +1,33 @@
{
"sources": [
{
"filename": "ggml-model-q5_k.gguf",
"url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q5_k.gguf"
},
{
"filename": "mmproj-model-f16.gguf",
"url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf"
}
],
"id": "bakllava-1",
"object": "model",
"name": "BakLlava 1",
"version": "1.0",
"description": "BakLlava 1 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Mys",
"tags": ["Vision"],
"size": 5750000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf", "sources": [
"id": "capybara-34b", {
"object": "model", "filename": "nous-capybara-34b.Q5_K_M.gguf",
"name": "Capybara 200k 34B Q5", "url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf"
"version": "1.0", }
"description": "Nous Capybara 34B is a long context length model that supports 200K tokens.", ],
"format": "gguf", "id": "capybara-34b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Capybara 200k 34B Q5",
"prompt_template": "USER:\n{prompt}\nASSISTANT:" "version": "1.0",
}, "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "USER:\n{prompt}\nASSISTANT:",
"max_tokens": 4096, "llama_model_path": "nous-capybara-34b.Q5_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "NousResearch, The Bloke", "max_tokens": 4096,
"tags": ["34B", "Finetuned"], "stop": [],
"size": 24320000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "NousResearch, The Bloke",
"tags": ["34B", "Finetuned"],
"size": 24320000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,33 @@
{ {
"source_url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf", "sources": [
"id": "codeninja-1.0-7b", {
"object": "model", "filename": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
"name": "CodeNinja 7B Q4", "url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf"
"version": "1.0", }
"description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.", ],
"format": "gguf", "id": "codeninja-1.0-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "CodeNinja 7B Q4",
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:" "version": "1.0",
}, "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"max_tokens": 4096, "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
"stop": ["<|end_of_turn|>"], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Beowolx", "max_tokens": 4096,
"tags": ["7B", "Finetuned"], "frequency_penalty": 0,
"size": 4370000000 "presence_penalty": 0
}, },
"engine": "nitro" "metadata": {
} "author": "Beowolx",
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -2,7 +2,12 @@
"object": "model", "object": "model",
"version": 1, "version": 1,
"format": "gguf", "format": "gguf",
"source_url": "N/A", "sources": [
{
"url": "N/A",
"filename": "N/A"
}
],
"id": "N/A", "id": "N/A",
"name": "N/A", "name": "N/A",
"created": 0, "created": 0,
@ -10,7 +15,8 @@
"settings": { "settings": {
"ctx_len": 4096, "ctx_len": 4096,
"embedding": false, "embedding": false,
"prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:" "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
"llama_model_path": "N/A"
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf", "sources": [
"id": "deepseek-coder-1.3b", {
"object": "model", "filename": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
"name": "Deepseek Coder 1.3B Q8", "url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf"
"version": "1.0", }
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", ],
"format": "gguf", "id": "deepseek-coder-1.3b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Deepseek Coder 1.3B Q8",
"prompt_template": "### Instruction:\n{prompt}\n### Response:" "version": "1.0",
}, "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "### Instruction:\n{prompt}\n### Response:",
"max_tokens": 4096, "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Deepseek, The Bloke", "max_tokens": 4096,
"tags": ["Tiny", "Foundational Model"], "stop": [],
"size": 1430000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Deepseek, The Bloke",
"tags": ["Tiny", "Foundational Model"],
"size": 1430000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf", "sources": [
"id": "deepseek-coder-34b", {
"object": "model", "filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
"name": "Deepseek Coder 33B Q5", "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
"version": "1.0", }
"description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.", ],
"format": "gguf", "id": "deepseek-coder-34b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Deepseek Coder 33B Q5",
"prompt_template": "### Instruction:\n{prompt}\n### Response:" "version": "1.0",
}, "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "### Instruction:\n{prompt}\n### Response:",
"max_tokens": 4096, "llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Deepseek, The Bloke", "max_tokens": 4096,
"tags": ["34B", "Foundational Model"], "stop": [],
"size": 19940000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Deepseek, The Bloke",
"tags": ["34B", "Foundational Model"],
"size": 19940000000
},
"engine": "nitro"
}

View File

@ -1,28 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf", "sources": [
"id": "dolphin-2.7-mixtral-8x7b", {
"object": "model", "filename": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
"name": "Dolphin 8x7B Q4", "url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
"version": "1.0", }
"description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.", ],
"format": "gguf", "id": "dolphin-2.7-mixtral-8x7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Dolphin 8x7B Q4",
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" "version": "1.0",
}, "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"max_tokens": 4096, "llama_model_path": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Cognitive Computations, TheBloke", "max_tokens": 4096,
"tags": ["70B", "Finetuned"], "stop": [],
"size": 26440000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Cognitive Computations, TheBloke",
"tags": ["70B", "Finetuned"],
"size": 26440000000
},
"engine": "nitro"
}

View File

@ -1,18 +1,20 @@
{ {
"source_url": "https://openai.com", "sources": [
"id": "gpt-3.5-turbo-16k-0613", {
"object": "model", "url": "https://openai.com"
"name": "OpenAI GPT 3.5 Turbo 16k 0613", }
"version": "1.0", ],
"description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good", "id": "gpt-3.5-turbo-16k-0613",
"format": "api", "object": "model",
"settings": {}, "name": "OpenAI GPT 3.5 Turbo 16k 0613",
"parameters": {}, "version": "1.0",
"metadata": { "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
"author": "OpenAI", "format": "api",
"tags": ["General", "Big Context Length"] "settings": {},
}, "parameters": {},
"engine": "openai", "metadata": {
"state": "ready" "author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai"
} }

View File

@ -1,18 +1,20 @@
{ {
"source_url": "https://openai.com", "sources": [
"id": "gpt-3.5-turbo", {
"object": "model", "url": "https://openai.com"
"name": "OpenAI GPT 3.5 Turbo", }
"version": "1.0", ],
"description": "OpenAI GPT 3.5 Turbo model is extremely good", "id": "gpt-3.5-turbo",
"format": "api", "object": "model",
"settings": {}, "name": "OpenAI GPT 3.5 Turbo",
"parameters": {}, "version": "1.0",
"metadata": { "description": "OpenAI GPT 3.5 Turbo model is extremely good",
"author": "OpenAI", "format": "api",
"tags": ["General", "Big Context Length"] "settings": {},
}, "parameters": {},
"engine": "openai", "metadata": {
"state": "ready" "author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai"
} }

View File

@ -1,18 +1,20 @@
{ {
"source_url": "https://openai.com", "sources": [
"id": "gpt-4", {
"object": "model", "url": "https://openai.com"
"name": "OpenAI GPT 4", }
"version": "1.0", ],
"description": "OpenAI GPT 4 model is extremely good", "id": "gpt-4",
"format": "api", "object": "model",
"settings": {}, "name": "OpenAI GPT 4",
"parameters": {}, "version": "1.0",
"metadata": { "description": "OpenAI GPT 4 model is extremely good",
"author": "OpenAI", "format": "api",
"tags": ["General", "Big Context Length"] "settings": {},
}, "parameters": {},
"engine": "openai", "metadata": {
"state": "ready" "author": "OpenAI",
"tags": ["General", "Big Context Length"]
},
"engine": "openai"
} }

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf", "sources": [
"id": "llama2-chat-70b-q4", {
"object": "model", "filename": "llama-2-70b-chat.Q4_K_M.gguf",
"name": "Llama 2 Chat 70B Q4", "url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf"
"version": "1.0", }
"description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", ],
"format": "gguf", "id": "llama2-chat-70b-q4",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Llama 2 Chat 70B Q4",
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]" "version": "1.0",
}, "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"max_tokens": 4096, "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "MetaAI, The Bloke", "max_tokens": 4096,
"tags": ["70B", "Foundational Model"], "stop": [],
"size": 43920000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "MetaAI, The Bloke",
"tags": ["70B", "Foundational Model"],
"size": 43920000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf", "sources": [
"id": "llama2-chat-7b-q4", {
"object": "model", "filename": "llama-2-7b-chat.Q4_K_M.gguf",
"name": "Llama 2 Chat 7B Q4", "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
"version": "1.0", }
"description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", ],
"format": "gguf", "id": "llama2-chat-7b-q4",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Llama 2 Chat 7B Q4",
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]" "version": "1.0",
}, "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
"max_tokens": 4096, "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "MetaAI, The Bloke", "max_tokens": 4096,
"tags": ["7B", "Foundational Model"], "stop": [],
"size": 4080000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "MetaAI, The Bloke",
"tags": ["7B", "Foundational Model"],
"size": 4080000000
},
"engine": "nitro"
}

View File

@ -0,0 +1,33 @@
{
"sources": [
{
"filename": "ggml-model-q5_k.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf"
},
{
"filename": "mmproj-model-f16.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/mmproj-model-f16.gguf"
}
],
"id": "llava-1.5-13b-q5",
"object": "model",
"name": "LlaVa 1.5 13B Q5 K",
"version": "1.0",
"description": "LlaVa 1.5 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Mys",
"tags": ["Vision"],
"size": 9850000000
},
"engine": "nitro"
}

View File

@ -0,0 +1,33 @@
{
"sources": [
{
"filename": "ggml-model-q5_k.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf"
},
{
"filename": "mmproj-model-f16.gguf",
"url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf"
}
],
"id": "llava-1.5-7b-q5",
"object": "model",
"name": "LlaVa 1.5 7B Q5 K",
"version": "1.0",
"description": "LlaVa 1.5 can bring vision understanding to Jan",
"format": "gguf",
"settings": {
"ctx_len": 4096,
"prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
"llama_model_path": "ggml-model-q5_k.gguf",
"mmproj": "mmproj-model-f16.gguf"
},
"parameters": {
"max_tokens": 4096
},
"metadata": {
"author": "Mys",
"tags": ["Vision"],
"size": 5400000000
},
"engine": "nitro"
}

View File

@ -1,30 +1,35 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "sources": [
"id": "mistral-ins-7b-q4", {
"object": "model", "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
"name": "Mistral Instruct 7B Q4", "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
"version": "1.0", }
"description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.", ],
"format": "gguf", "id": "mistral-ins-7b-q4",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Mistral Instruct 7B Q4",
"prompt_template": "[INST] {prompt} [/INST]" "version": "1.0",
}, "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "[INST] {prompt} [/INST]",
"max_tokens": 4096, "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "MistralAI, The Bloke", "max_tokens": 4096,
"tags": ["Featured", "7B", "Foundational Model"], "stop": [],
"size": 4370000000, "frequency_penalty": 0,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png" "presence_penalty": 0
}, },
"engine": "nitro" "metadata": {
} "author": "MistralAI, The Bloke",
"tags": ["Featured", "7B", "Foundational Model"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
},
"engine": "nitro"
}

View File

@ -1,28 +1,33 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", "sources": [
"id": "mixtral-8x7b-instruct", {
"object": "model", "filename": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
"name": "Mixtral 8x7B Instruct Q4", "url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
"version": "1.0", }
"description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.", ],
"format": "gguf", "id": "mixtral-8x7b-instruct",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Mixtral 8x7B Instruct Q4",
"prompt_template": "[INST] {prompt} [/INST]" "version": "1.0",
}, "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "[INST] {prompt} [/INST]",
"max_tokens": 4096, "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "MistralAI, TheBloke", "max_tokens": 4096,
"tags": ["70B", "Foundational Model"], "frequency_penalty": 0,
"size": 26440000000 "presence_penalty": 0
}, },
"engine": "nitro" "metadata": {
} "author": "MistralAI, TheBloke",
"tags": ["70B", "Foundational Model"],
"size": 26440000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf", "sources": [
"id": "noromaid-7b", {
"object": "model", "filename": "Noromaid-7b-v0.1.1.q5_k_m.gguf",
"name": "Noromaid 7B Q5", "url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf"
"version": "1.0", }
"description": "The Noromaid 7b model is designed for role-playing with human-like behavior.", ],
"format": "gguf", "id": "noromaid-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Noromaid 7B Q5",
"prompt_template": "### Instruction:{prompt}\n### Response:" "version": "1.0",
}, "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "### Instruction:{prompt}\n### Response:",
"max_tokens": 4096, "llama_model_path": "Noromaid-7b-v0.1.1.q5_k_m.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "NeverSleep", "max_tokens": 4096,
"tags": ["7B", "Merged"], "stop": [],
"size": 4370000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "NeverSleep",
"tags": ["7B", "Merged"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,28 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf", "sources": [
"id": "openchat-3.5-7b", {
"object": "model", "filename": "openchat-3.5-1210.Q4_K_M.gguf",
"name": "Openchat-3.5 7B Q4", "url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf"
"version": "1.0", }
"description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.", ],
"format": "gguf", "id": "openchat-3.5-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Openchat-3.5 7B Q4",
"prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:" "version": "1.0",
}, "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
"max_tokens": 4096, "llama_model_path": "openchat-3.5-1210.Q4_K_M.gguf"
"stop": ["<|end_of_turn|>"], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Openchat", "max_tokens": 4096,
"tags": ["Recommended", "7B", "Finetuned"], "stop": ["<|end_of_turn|>"],
"size": 4370000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Openchat",
"tags": ["Recommended", "7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf", "sources": [
"id": "openhermes-neural-7b", {
"object": "model", "filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
"name": "OpenHermes Neural 7B Q4", "url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
"version": "1.0", }
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.", ],
"format": "gguf", "id": "openhermes-neural-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "OpenHermes Neural 7B Q4",
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" "version": "1.0",
}, "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"max_tokens": 4096, "llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Intel, Jan", "max_tokens": 4096,
"tags": ["7B", "Merged", "Featured"], "frequency_penalty": 0,
"size": 4370000000, "presence_penalty": 0
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png" },
}, "metadata": {
"engine": "nitro" "author": "Intel, Jan",
} "tags": ["7B", "Merged", "Featured"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf", "sources": [
"id": "phi-2-3b", {
"object": "model", "filename": "phi-2.Q8_0.gguf",
"name": "Phi-2 3B Q8", "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf"
"version": "1.0", }
"description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.", ],
"format": "gguf", "id": "phi-2-3b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Phi-2 3B Q8",
"prompt_template": "Intruct:\n{prompt}\nOutput:" "version": "1.0",
}, "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "Intruct:\n{prompt}\nOutput:",
"max_tokens": 4096, "llama_model_path": "phi-2.Q8_0.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Microsoft", "max_tokens": 4096,
"tags": ["3B","Foundational Model"], "stop": [],
"size": 2960000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Microsoft",
"tags": ["3B", "Foundational Model"],
"size": 2960000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf", "sources": [
"id": "phind-34b", {
"object": "model", "filename": "phind-codellama-34b-v2.Q5_K_M.gguf",
"name": "Phind 34B Q5", "url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf"
"version": "1.0", }
"description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.", ],
"format": "gguf", "id": "phind-34b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Phind 34B Q5",
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant" "version": "1.0",
}, "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
"max_tokens": 4096, "llama_model_path": "phind-codellama-34b-v2.Q5_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Phind, The Bloke", "max_tokens": 4096,
"tags": ["34B", "Finetuned"], "stop": [],
"size": 20220000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Phind, The Bloke",
"tags": ["34B", "Finetuned"],
"size": 20220000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,33 @@
{ {
"source_url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf", "sources": [
"id": "solar-10.7b-slerp", {
"object": "model", "filename": "solar-10.7b-slerp.Q4_K_M.gguf",
"name": "Solar Slerp 10.7B Q4", "url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf"
"version": "1.0", }
"description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1", ],
"format": "gguf", "id": "solar-10.7b-slerp",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Solar Slerp 10.7B Q4",
"prompt_template": "### User: {prompt}\n### Assistant:" "version": "1.0",
}, "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "### User: {prompt}\n### Assistant:",
"max_tokens": 4096, "llama_model_path": "solar-10.7b-slerp.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Jan", "max_tokens": 4096,
"tags": ["13B","Finetuned"], "frequency_penalty": 0,
"size": 6360000000 "presence_penalty": 0
}, },
"engine": "nitro" "metadata": {
} "author": "Jan",
"tags": ["13B", "Finetuned"],
"size": 6360000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf", "sources": [
"id": "starling-7b", {
"object": "model", "filename": "starling-lm-7b-alpha.Q4_K_M.gguf",
"name": "Starling alpha 7B Q4", "url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf"
"version": "1.0", }
"description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.", ],
"format": "gguf", "id": "starling-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Starling alpha 7B Q4",
"prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:" "version": "1.0",
}, "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:",
"max_tokens": 4096, "llama_model_path": "starling-lm-7b-alpha.Q4_K_M.gguf"
"stop": ["<|end_of_turn|>"], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Berkeley-nest, The Bloke", "max_tokens": 4096,
"tags": ["7B","Finetuned"], "stop": ["<|end_of_turn|>"],
"size": 4370000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "Berkeley-nest, The Bloke",
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,32 +1,33 @@
{ {
"source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf", "sources": [
"id": "stealth-v1.2-7b", {
"object": "model", "filename": "stealth-v1.3.Q4_K_M.gguf",
"name": "Stealth 7B Q4", "url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf"
"version": "1.0", }
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.", ],
"format": "gguf", "id": "stealth-v1.2-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Stealth 7B Q4",
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" "version": "1.0",
}, "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"max_tokens": 4096, "llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Jan", "max_tokens": 4096,
"tags": [ "frequency_penalty": 0,
"7B", "presence_penalty": 0
"Finetuned", },
"Featured" "metadata": {
], "author": "Jan",
"size": 4370000000 "tags": ["7B", "Finetuned", "Featured"],
}, "size": 4370000000
"engine": "nitro" },
} "engine": "nitro"
}

View File

@ -1,5 +1,10 @@
{ {
"source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf", "sources": [
{
"filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
"url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
}
],
"id": "tinyllama-1.1b", "id": "tinyllama-1.1b",
"object": "model", "object": "model",
"name": "TinyLlama Chat 1.1B Q4", "name": "TinyLlama Chat 1.1B Q4",
@ -7,8 +12,9 @@
"description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.", "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
"format": "gguf", "format": "gguf",
"settings": { "settings": {
"ctx_len": 2048, "ctx_len": 4096,
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>" "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
"llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
}, },
"parameters": { "parameters": {
"temperature": 0.7, "temperature": 0.7,
@ -20,9 +26,9 @@
"presence_penalty": 0 "presence_penalty": 0
}, },
"metadata": { "metadata": {
"author": "TinyLlama", "author": "TinyLlama",
"tags": ["Tiny", "Foundation Model"], "tags": ["Tiny", "Foundation Model"],
"size": 669000000 "size": 669000000
}, },
"engine": "nitro" "engine": "nitro"
} }

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf", "sources": [
"id": "trinity-v1.2-7b", {
"object": "model", "filename": "trinity-v1.2.Q4_K_M.gguf",
"name": "Trinity-v1.2 7B Q4", "url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
"version": "1.0", }
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.", ],
"format": "gguf", "id": "trinity-v1.2-7b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Trinity-v1.2 7B Q4",
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" "version": "1.0",
}, "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"max_tokens": 4096, "llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Jan", "max_tokens": 4096,
"tags": ["7B", "Merged", "Featured"], "frequency_penalty": 0,
"size": 4370000000, "presence_penalty": 0
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png" },
}, "metadata": {
"engine": "nitro" "author": "Jan",
} "tags": ["7B", "Merged", "Featured"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
},
"engine": "nitro"
}

View File

@ -1,28 +1,33 @@
{ {
"source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf", "sources": [
"id": "tulu-2-70b", {
"object": "model", "filename": "tulu-2-dpo-70b.Q4_K_M.gguf",
"name": "Tulu 2 70B Q4", "url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf"
"version": "1.0", }
"description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.", ],
"format": "gguf", "id": "tulu-2-70b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Tulu 2 70B Q4",
"prompt_template": "<|user|>\n{prompt}\n<|assistant|>" "version": "1.0",
}, "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "<|user|>\n{prompt}\n<|assistant|>",
"max_tokens": 4096, "llama_model_path": "tulu-2-dpo-70b.Q4_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "Lizpreciatior, The Bloke", "max_tokens": 4096,
"tags": ["70B", "Finetuned"], "frequency_penalty": 0,
"size": 41400000000 "presence_penalty": 0
}, },
"engine": "nitro" "metadata": {
} "author": "Lizpreciatior, The Bloke",
"tags": ["70B", "Finetuned"],
"size": 41400000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf", "sources": [
"id": "wizardcoder-13b", {
"object": "model", "filename": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
"name": "Wizard Coder Python 13B Q5", "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
"version": "1.0", }
"description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.", ],
"format": "gguf", "id": "wizardcoder-13b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Wizard Coder Python 13B Q5",
"prompt_template": "### Instruction:\n{prompt}\n### Response:" "version": "1.0",
}, "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "### Instruction:\n{prompt}\n### Response:",
"max_tokens": 4096, "llama_model_path": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "WizardLM, The Bloke", "max_tokens": 4096,
"tags": ["Recommended", "13B", "Finetuned"], "stop": [],
"size": 7870000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "WizardLM, The Bloke",
"tags": ["Recommended", "13B", "Finetuned"],
"size": 7870000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,31 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf", "sources": [
"id": "yarn-mistral-7b", {
"object": "model", "url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf"
"name": "Yarn Mistral 7B Q4", }
"version": "1.0", ],
"description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.", "id": "yarn-mistral-7b",
"format": "gguf", "object": "model",
"settings": { "name": "Yarn Mistral 7B Q4",
"ctx_len": 4096, "version": "1.0",
"prompt_template": "{prompt}" "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
}, "format": "gguf",
"parameters": { "settings": {
"temperature": 0.7, "ctx_len": 4096,
"top_p": 0.95, "prompt_template": "{prompt}"
"stream": true, },
"max_tokens": 4096, "parameters": {
"stop": [], "temperature": 0.7,
"frequency_penalty": 0, "top_p": 0.95,
"presence_penalty": 0 "stream": true,
}, "max_tokens": 4096,
"metadata": { "frequency_penalty": 0,
"author": "NousResearch, The Bloke", "presence_penalty": 0
"tags": ["7B","Finetuned"], },
"size": 4370000000 "metadata": {
}, "author": "NousResearch, The Bloke",
"engine": "nitro" "tags": ["7B", "Finetuned"],
} "size": 4370000000
},
"engine": "nitro"
}

View File

@ -1,29 +1,34 @@
{ {
"source_url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf", "sources": [
"id": "yi-34b", {
"object": "model", "filename": "yi-34b-chat.Q5_K_M.gguf",
"name": "Yi 34B Q5", "url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf"
"version": "1.0", }
"description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.", ],
"format": "gguf", "id": "yi-34b",
"settings": { "object": "model",
"ctx_len": 4096, "name": "Yi 34B Q5",
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" "version": "1.0",
}, "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
"parameters": { "format": "gguf",
"temperature": 0.7, "settings": {
"top_p": 0.95, "ctx_len": 4096,
"stream": true, "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
"max_tokens": 4096, "llama_model_path": "yi-34b-chat.Q5_K_M.gguf"
"stop": [], },
"frequency_penalty": 0, "parameters": {
"presence_penalty": 0 "temperature": 0.7,
}, "top_p": 0.95,
"metadata": { "stream": true,
"author": "01-ai, The Bloke", "max_tokens": 4096,
"tags": ["34B", "Foundational Model"], "stop": [],
"size": 20660000000 "frequency_penalty": 0,
}, "presence_penalty": 0
"engine": "nitro" },
} "metadata": {
"author": "01-ai, The Bloke",
"tags": ["34B", "Foundational Model"],
"size": 20660000000
},
"engine": "nitro"
}

View File

@ -105,6 +105,7 @@ export default function EventListenerWrapper({ children }: PropsWithChildren) {
}) })
} }
return () => {} return () => {}
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []) }, [])
return ( return (

View File

@ -6,6 +6,7 @@ import {
ModelExtension, ModelExtension,
abortDownload, abortDownload,
joinPath, joinPath,
ModelArtifact,
} from '@janhq/core' } from '@janhq/core'
import { useSetAtom } from 'jotai' import { useSetAtom } from 'jotai'
@ -25,6 +26,23 @@ export default function useDownloadModel() {
const addNewDownloadingModel = useSetAtom(addNewDownloadingModelAtom) const addNewDownloadingModel = useSetAtom(addNewDownloadingModelAtom)
const downloadModel = async (model: Model) => { const downloadModel = async (model: Model) => {
const childrenDownloadProgress: DownloadState[] = []
model.sources.forEach((source: ModelArtifact) => {
childrenDownloadProgress.push({
modelId: source.filename,
time: {
elapsed: 0,
remaining: 0,
},
speed: 0,
percent: 0,
size: {
total: 0,
transferred: 0,
},
})
})
// set an initial download state // set an initial download state
setDownloadState({ setDownloadState({
modelId: model.id, modelId: model.id,
@ -38,6 +56,7 @@ export default function useDownloadModel() {
total: 0, total: 0,
transferred: 0, transferred: 0,
}, },
children: childrenDownloadProgress,
}) })
addNewDownloadingModel(model) addNewDownloadingModel(model)
@ -46,6 +65,7 @@ export default function useDownloadModel() {
.get<ModelExtension>(ExtensionTypeEnum.Model) .get<ModelExtension>(ExtensionTypeEnum.Model)
?.downloadModel(model, { ignoreSSL, proxy }) ?.downloadModel(model, { ignoreSSL, proxy })
} }
const abortModelDownload = async (model: Model) => { const abortModelDownload = async (model: Model) => {
await abortDownload( await abortDownload(
await joinPath(['models', model.id, modelBinFileName(model)]) await joinPath(['models', model.id, modelBinFileName(model)])

View File

@ -1,7 +1,6 @@
import { useEffect, useState } from 'react' import { useEffect, useState } from 'react'
import { ExtensionTypeEnum, ModelExtension } from '@janhq/core' import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
import { Model } from '@janhq/core'
import { extensionManager } from '@/extension/ExtensionManager' import { extensionManager } from '@/extension/ExtensionManager'
@ -25,6 +24,7 @@ export function useGetConfiguredModels() {
useEffect(() => { useEffect(() => {
fetchModels() fetchModels()
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []) }, [])
return { loading, models } return { loading, models }

View File

@ -6,7 +6,7 @@ import { atom, useAtom } from 'jotai'
import { extensionManager } from '@/extension/ExtensionManager' import { extensionManager } from '@/extension/ExtensionManager'
const downloadedModelsAtom = atom<Model[]>([]) export const downloadedModelsAtom = atom<Model[]>([])
export function useGetDownloadedModels() { export function useGetDownloadedModels() {
const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom) const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
@ -15,7 +15,8 @@ export function useGetDownloadedModels() {
getDownloadedModels().then((downloadedModels) => { getDownloadedModels().then((downloadedModels) => {
setDownloadedModels(downloadedModels) setDownloadedModels(downloadedModels)
}) })
}, [setDownloadedModels]) // eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
return { downloadedModels, setDownloadedModels } return { downloadedModels, setDownloadedModels }
} }

View File

@ -110,6 +110,7 @@ export default function useRecommendedModel() {
console.debug(`Using last used model ${lastUsedModel.id}`) console.debug(`Using last used model ${lastUsedModel.id}`)
setRecommendedModel(lastUsedModel) setRecommendedModel(lastUsedModel)
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [getAndSortDownloadedModels, activeThread]) }, [getAndSortDownloadedModels, activeThread])
useEffect(() => { useEffect(() => {

View File

@ -13,6 +13,7 @@ export const useSettings = () => {
useEffect(() => { useEffect(() => {
setTimeout(() => validateSettings, 3000) setTimeout(() => validateSettings, 3000)
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []) }, [])
const validateSettings = async () => { const validateSettings = async () => {

View File

@ -21,7 +21,7 @@
"class-variance-authority": "^0.7.0", "class-variance-authority": "^0.7.0",
"framer-motion": "^10.16.4", "framer-motion": "^10.16.4",
"highlight.js": "^11.9.0", "highlight.js": "^11.9.0",
"jotai": "^2.4.0", "jotai": "^2.6.0",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"lucide-react": "^0.291.0", "lucide-react": "^0.291.0",
"marked": "^9.1.2", "marked": "^9.1.2",

View File

@ -26,7 +26,7 @@ import { useCreateNewThread } from '@/hooks/useCreateNewThread'
import useDownloadModel from '@/hooks/useDownloadModel' import useDownloadModel from '@/hooks/useDownloadModel'
import { useDownloadState } from '@/hooks/useDownloadState' import { useDownloadState } from '@/hooks/useDownloadState'
import { getAssistants } from '@/hooks/useGetAssistants' import { getAssistants } from '@/hooks/useGetAssistants'
import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' import { downloadedModelsAtom } from '@/hooks/useGetDownloadedModels'
import { useMainViewState } from '@/hooks/useMainViewState' import { useMainViewState } from '@/hooks/useMainViewState'
import { toGibibytes } from '@/utils/converter' import { toGibibytes } from '@/utils/converter'
@ -43,8 +43,8 @@ type Props = {
const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => { const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
const { downloadModel } = useDownloadModel() const { downloadModel } = useDownloadModel()
const { downloadedModels } = useGetDownloadedModels() const downloadedModels = useAtomValue(downloadedModelsAtom)
const { modelDownloadStateAtom, downloadStates } = useDownloadState() const { modelDownloadStateAtom } = useDownloadState()
const { requestCreateNewThread } = useCreateNewThread() const { requestCreateNewThread } = useCreateNewThread()
const totalRam = useAtomValue(totalRamAtom) const totalRam = useAtomValue(totalRamAtom)
const serverEnabled = useAtomValue(serverEnabledAtom) const serverEnabled = useAtomValue(serverEnabledAtom)
@ -100,9 +100,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
)} )}
</Tooltip> </Tooltip>
) )
} } else if (downloadState != null) {
if (downloadState != null && downloadStates.length > 0) {
downloadButton = <ModalCancelDownload model={model} /> downloadButton = <ModalCancelDownload model={model} />
} }

View File

@ -4,6 +4,8 @@ type DownloadState = {
speed: number speed: number
percent: number percent: number
size: DownloadSize size: DownloadSize
isFinished?: boolean
children?: DownloadState[]
error?: string error?: string
} }

View File

@ -2,7 +2,7 @@ import { Model } from '@janhq/core'
export const modelBinFileName = (model: Model) => { export const modelBinFileName = (model: Model) => {
const modelFormatExt = '.gguf' const modelFormatExt = '.gguf'
const extractedFileName = model.source_url?.split('/').pop() ?? model.id const extractedFileName = model.sources[0]?.url.split('/').pop() ?? model.id
const fileName = extractedFileName.toLowerCase().endsWith(modelFormatExt) const fileName = extractedFileName.toLowerCase().endsWith(modelFormatExt)
? extractedFileName ? extractedFileName
: model.id : model.id

View File

@ -40,6 +40,8 @@ export const toSettingParams = (
n_parallel: undefined, n_parallel: undefined,
cpu_threads: undefined, cpu_threads: undefined,
prompt_template: undefined, prompt_template: undefined,
llama_model_path: undefined,
mmproj: undefined,
} }
const settingParams: ModelSettingParams = {} const settingParams: ModelSettingParams = {}