feat: support multiple model binaries (#1659)

* feat: Support multiple model binaries * fix: Update downloadModel with multiple binaries handler * feat: Add 3 models with multiple binaries * chore: fix model download * fix: model file lookup & model path * chore: add .prettierrc * chore: refactor docs * chore: bump model version * fix(capybara): add filename * fix(codeninja): add file name + llama model path * fix(default): add llama model path * fix(deepseek coder): add filename * fix(deepseek 33B): add filename * fix(dolphin mixtral): add filename * fix(llama2-chat): add filename * fix(llama2-70B): add filename * fix(mistral 7b): add filename + model path * fix(bakllava): correct size model * fix(llava-7b): correct size model * fix(llava-13b): correct size model * fix(mixtral-8x7b): add file name + modelpath * fix(noramaid-7b): add file name + modelpath * fix(openchat-7b): add file name + modelpath * fix(openhermes-7b): add file name + modelpath * fix(phi2-3b): add file name + modelpath * fix(phind): add file name + modelpath * fix(solarslerp): add file name + modelpath * fix(starling): add file name + modelpath * fix(stealth): add file name + modelpath * fix(tinyllama): add file name + modelpath * fix(trinity): add file name + modelpath * fix(tulu): add file name + modelpath * fix(wizardcoder): add file name + modelpath * fix(yi): add file name + modelpath * update from source -> sources Signed-off-by: James <james@jan.ai> --------- Signed-off-by: James <james@jan.ai> Co-authored-by: hiro <vuonghoainam.work@gmail.com> Co-authored-by: hahuyhoang411 <hahuyhoanghhh41@gmail.com> Co-authored-by: James <james@jan.ai>
2024-01-25 14:05:33 +07:00 · 2024-01-25 14:05:33 +07:00 · 0e48be67e8
commit 0e48be67e8
parent 3b8e2c5585
53 changed files with 1117 additions and 775 deletions
--- a/core/.prettierignore
+++ b/core/.prettierignore
@ -0,0 +1,5 @@
+.next/
+node_modules/
+dist/
+*.hbs
+*.mdx
--- a/core/src/node/api/common/builder.ts
+++ b/core/src/node/api/common/builder.ts
@ -265,19 +265,22 @@ export const downloadModel = async (
  const modelBinaryPath = join(directoryPath, modelId)

  const request = require('request')
-  const rq = request({ url: model.source_url, strictSSL, proxy })
  const progress = require('request-progress')
-  progress(rq, {})
-    .on('progress', function (state: any) {
-      console.log('progress', JSON.stringify(state, null, 2))
-    })
-    .on('error', function (err: Error) {
-      console.error('error', err)
-    })
-    .on('end', function () {
-      console.log('end')
-    })
-    .pipe(fs.createWriteStream(modelBinaryPath))
+
+  for (const source of model.sources) {
+    const rq = request({ url: source, strictSSL, proxy })
+    progress(rq, {})
+      .on('progress', function (state: any) {
+        console.debug('progress', JSON.stringify(state, null, 2))
+      })
+      .on('error', function (err: Error) {
+        console.error('error', err)
+      })
+      .on('end', function () {
+        console.debug('end')
+      })
+      .pipe(fs.createWriteStream(modelBinaryPath))
+  }

  return {
    message: `Starting download ${modelId}`,
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -21,6 +21,11 @@ export enum InferenceEngine {
  hf_endpoint = 'hf_endpoint',
 }

+export type ModelArtifact = {
+  filename: string
+  url: string
+}
+
 /**
 * Model type defines the shape of a model object.
 * @stored
@ -45,7 +50,7 @@ export type Model = {
  /**
   * The model download source. It can be an external url or a local filepath.
   */
-  source_url: string
+  sources: ModelArtifact[]

  /**
   * The model identifier, which can be referenced in the API endpoints.
@ -107,6 +112,8 @@ export type ModelSettingParams = {
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
+  llama_model_path?: string
+  mmproj?: string
 }

 /**
--- a/docs/docs/docs/03-engineering/models.md
+++ b/docs/docs/docs/03-engineering/models.md
@ -56,7 +56,6 @@ jan/                               # Jan root folder

 - Each `model` folder contains a `model.json` file, which is a representation of a model.
 - `model.json` contains metadata and default parameters used to run a model.
- The only required field is `source_url`.

 ### Example

@ -64,36 +63,43 @@ Here's a standard example `model.json` for a GGUF model.

 ```js
 {
-"id": "zephyr-7b",                  // Defaults to foldername
-"object": "model",                  // Defaults to "model"
-"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
-"name": "Zephyr 7B",                // Defaults to foldername
-"owned_by": "you",                  // Defaults to "you"
-"version": "1",                     // Defaults to 1
-"created": 1231231,                 // Defaults to file creation time
-"description": null,                // Defaults to null
-"state": enum[null, "ready"]
-"format": "ggufv3",                 // Defaults to "ggufv3"
-"engine": "nitro",                // engine_id specified in jan/engine folder
-"engine_parameters": {              // Engine parameters inside model.json can override
-    "ctx_len": 2048,                // the value inside the base engine.json
+  "id": "zephyr-7b",        // Defaults to foldername
+  "object": "model",        // Defaults to "model"
+  "sources": [
+    {
+      "filename": "zephyr-7b-beta.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
+    }
+  ],
+  "name": "Zephyr 7B",      // Defaults to foldername
+  "owned_by": "you",        // Defaults to "you"
+  "version": "1",           // Defaults to 1
+  "created": 1231231,       // Defaults to file creation time
+  "description": null,      // Defaults to null
+  "format": "ggufv3",       // Defaults to "ggufv3"
+  "engine": "nitro",        // engine_id specified in jan/engine folder
+  "engine_parameters": {
+    // Engine parameters inside model.json can override
+    "ctx_len": 4096,        // the value inside the base engine.json
    "ngl": 100,
    "embedding": true,
-    "n_parallel": 4,
-},
-"model_parameters": {                     // Models are called parameters
+    "n_parallel": 4
+  },
+  "model_parameters": {
+    // Models are called parameters
    "stream": true,
-    "max_tokens": 2048,
-    "stop": ["<endofstring>"],      // This usually can be left blank, only used with specific need from model author
+    "max_tokens": 4096,
+    "stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "temperature": 0.7,
    "top_p": 0.95
-},
-"metadata": {},                     // Defaults to {}
-"assets": [                         // Defaults to current dir
-    "file://.../zephyr-7b-q4_k_m.bin",
-]
+  },
+  "metadata": {},           // Defaults to {}
+  "assets": [
+    // Defaults to current dir
+    "file://.../zephyr-7b-q4_k_m.bin"
+  ]
 }
 ```

--- a/docs/docs/guides/04-using-models/02-import-manually.mdx
+++ b/docs/docs/guides/04-using-models/02-import-manually.mdx
@ -31,7 +31,6 @@ In this section, we will show you how to import a GGUF model from [HuggingFace](

 ## Manually Importing a Downloaded Model (nightly versions and v0.4.4+)

-
 ### 1. Create a Model Folder

 Navigate to the `~/jan/models` folder. You can find this folder by going to `App Settings` > `Advanced` > `Open App Directory`.
@ -92,7 +91,7 @@ Drag and drop your model binary into this folder, ensuring the `modelname.gguf`

 #### 3. Voila

-If your model doesn't show up in the Model Selector in conversations, please restart the app. 
+If your model doesn't show up in the Model Selector in conversations, please restart the app.

 If that doesn't work, please feel free to join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.

@ -190,14 +189,18 @@ Edit `model.json` and include the following configurations:
 - Ensure the filename must be `model.json`.
 - Ensure the `id` property matches the folder name you created.
 - Ensure the GGUF filename should match the `id` property exactly.
- Ensure the `source_url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
+- Ensure the `source.url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
 - Ensure you are using the correct `prompt_template`. This is usually provided in the HuggingFace model's description page.
- Ensure the `state` property is set to `ready`.

 ```json title="model.json"
 {
  // highlight-start
-  "source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf",
+  "sources": [
+    {
+      "filename": "trinity-v1.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf"
+    }
+  ],
  "id": "trinity-v1-7b",
  // highlight-end
  "object": "model",
@ -208,7 +211,8 @@ Edit `model.json` and include the following configurations:
  "settings": {
    "ctx_len": 4096,
    // highlight-next-line
-    "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:"
+    "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "trinity-v1.Q4_K_M.gguf"
  },
  "parameters": {
    "max_tokens": 4096
@ -218,9 +222,7 @@ Edit `model.json` and include the following configurations:
    "tags": ["7B", "Merged"],
    "size": 4370000000
  },
-  "engine": "nitro",
-  // highlight-next-line
-  "state": "ready"
+  "engine": "nitro"
 }
 ```

--- a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
+++ b/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
@ -40,7 +40,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`

 ```json title="~/jan/models/gpt-3.5-turbo-16k/model.json"
 {
-  "source_url": "https://openai.com",
+  "sources": [
+    {
+      "filename": "openai",
+      "url": "https://openai.com"
+    }
+  ],
  // highlight-next-line
  "id": "gpt-3.5-turbo-16k",
  "object": "model",
@ -55,8 +60,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
    "author": "OpenAI",
    "tags": ["General", "Big Context Length"]
  },
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
@ -118,7 +122,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`

 ```json title="~/jan/models/mistral-ins-7b-q4/model.json"
 {
-  "source_url": "https://jan.ai",
+  "sources": [
+    {
+      "filename": "janai",
+      "url": "https://jan.ai"
+    }
+  ],
  // highlight-next-line
  "id": "mistral-ins-7b-q4",
  "object": "model",
@ -134,8 +143,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`
    "tags": ["remote", "awesome"]
  },
  // highlight-start
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
--- a/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx
+++ b/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx
@ -49,7 +49,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln

 ```json title="~/jan/models/openrouter-dolphin-mixtral-8x7b/model.json"
 {
-  "source_url": "https://openrouter.ai/",
+  "sources": [
+    {
+      "filename": "openrouter",
+      "url": "https://openrouter.ai/"
+    }
+  ],
  "id": "cognitivecomputations/dolphin-mixtral-8x7b",
  "object": "model",
  "name": "Dolphin 2.6 Mixtral 8x7B",
@ -63,8 +68,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln
    "tags": ["General", "Big Context Length"]
  },
  // highlight-start
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
--- a/docs/docs/guides/07-integrations/03-integrate-azure-openai-service.mdx
+++ b/docs/docs/guides/07-integrations/03-integrate-azure-openai-service.mdx
@ -59,7 +59,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n

 ```json title="~/jan/models/gpt-35-hieu-jan/model.json"
 {
-  "source_url": "https://hieujan.openai.azure.com",
+  "sources": [
+    {
+      "filename": "azure_openai",
+      "url": "https://hieujan.openai.azure.com"
+    }
+  ],
  // highlight-next-line
  "id": "gpt-35-hieu-jan",
  "object": "model",
@ -75,8 +80,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n
    "tags": ["General", "Big Context Length"]
  },
  // highlight-start
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@ -59,18 +59,26 @@ components:
          type: string
          description: State format of the model, distinct from the engine.
          example: ggufv3
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+        source:
+          type: array
+          items:
+            type: object
+            properties:
+              url:
+                format: uri
+                description: URL to the source of the model.
+                example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+              filename:
+                type: string
+                description: Filename of the model.
+                example: zephyr-7b-beta.Q4_K_M.gguf
        settings:
          type: object
          properties:
            ctx_len:
              type: string
              description: Context length.
-              example: "2048"
+              example: "4096"
            ngl:
              type: string
              description: Number of layers.
@ -94,7 +102,7 @@ components:
            token_limit:
              type: string
              description: Token limit for the model.
-              example: "2048"
+              example: "4096"
            top_k:
              type: string
              description: Top-k setting for the model.
@ -117,7 +125,7 @@ components:
            type: string
          description: List of assets related to the model.
      required:
-        - source_url
+        - source
    ModelObject:
      type: object
      properties:
@ -169,11 +177,19 @@ components:
            - running
            - stopped
          description: The current state of the model.
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+        source:
+          type: array
+          items:
+            type: object
+            properties:
+              url:
+                format: uri
+                description: URL to the source of the model.
+                example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+              filename:
+                type: string
+                description: Filename of the model.
+                example: zephyr-7b-beta.Q4_K_M.gguf
        engine_parameters:
          type: object
          properties:
@ -208,8 +224,8 @@ components:
                model.
              minimum: 128
              maximum: 4096
-              default: 2048
-              example: 2048
+              default: 4096
+              example: 4096
            n_parallel:
              type: integer
              description: Number of parallel operations, relevant when continuous batching is
@ -241,8 +257,8 @@ components:
              description: Maximum context length the model can handle.
              minimum: 0
              maximum: 4096
-              default: 2048
-              example: 2048
+              default: 4096
+              example: 4096
            ngl:
              type: integer
              description: Number of layers in the neural network.
@ -276,8 +292,8 @@ components:
                response.
              minimum: 1
              maximum: 4096
-              default: 2048
-              example: 2048
+              default: 4096
+              example: 4096
            top_k:
              type: integer
              description: Limits the model to consider only the top k most likely next tokens
@ -318,7 +334,7 @@ components:
        - created
        - owned_by
        - state
-        - source_url
+        - source
        - parameters
        - metadata
    DeleteModelResponse:
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -119,11 +119,19 @@ async function runModel(
      wrapper.model.settings.ai_prompt = prompt.ai_prompt;
    }

+    const modelFolderPath = path.join(janRoot, "models", wrapper.model.id);
+    const modelPath = wrapper.model.settings.llama_model_path
+      ? path.join(modelFolderPath, wrapper.model.settings.llama_model_path)
+      : currentModelFile;
+
    currentSettings = {
-      llama_model_path: currentModelFile,
      ...wrapper.model.settings,
+      llama_model_path: modelPath,
      // This is critical and requires real CPU physical core count (or performance core)
      cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
+      ...(wrapper.model.settings.mmproj && {
+        mmproj: path.join(modelFolderPath, wrapper.model.settings.mmproj),
+      }),
    };
    console.log(currentSettings);
    return runNitroAndLoadModel();
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -1,6 +1,6 @@
 {
  "name": "@janhq/model-extension",
-  "version": "1.0.22",
+  "version": "1.0.23",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -80,16 +80,34 @@ export default class JanModelExtension extends ModelExtension {
    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
    if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)

-    // try to retrieve the download file name from the source url
-    // if it fails, use the model ID as the file name
-    const extractedFileName = await model.source_url.split('/').pop()
+    if (model.sources.length > 1) {
+      // path to model binaries
+      for (const source of model.sources) {
+        let path = this.extractFileName(source.url)
+        if (source.filename) {
+          path = await joinPath([modelDirPath, source.filename])
+        }
+
+        downloadFile(source.url, path, network)
+      }
+    } else {
+      const fileName = this.extractFileName(model.sources[0]?.url)
+      const path = await joinPath([modelDirPath, fileName])
+      downloadFile(model.sources[0]?.url, path, network)
+    }
+  }
+
+  /**
+   *  try to retrieve the download file name from the source url
+   */
+  private extractFileName(url: string): string {
+    const extractedFileName = url.split('/').pop()
    const fileName = extractedFileName
      .toLowerCase()
      .endsWith(JanModelExtension._supportedModelFormat)
      ? extractedFileName
-      : model.id
-    const path = await joinPath([modelDirPath, fileName])
-    downloadFile(model.source_url, path, network)
+      : extractedFileName + JanModelExtension._supportedModelFormat
+    return fileName
  }

  /**
@ -98,6 +116,7 @@ export default class JanModelExtension extends ModelExtension {
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
  async cancelModelDownload(modelId: string): Promise<void> {
+    const model = await this.getConfiguredModels()
    return abortDownload(
      await joinPath([JanModelExtension._homeDir, modelId, modelId])
    ).then(async () => {
@ -163,15 +182,16 @@ export default class JanModelExtension extends ModelExtension {
          .then((files: string[]) => {
            // or model binary exists in the directory
            // model binary name can match model ID or be a .gguf file and not be an incompleted model file
+            // TODO: Check diff between urls, filenames
            return (
              files.includes(modelDir) ||
-              files.some(
+              files.filter(
                (file) =>
                  file
                    .toLowerCase()
                    .includes(JanModelExtension._supportedModelFormat) &&
                  !file.endsWith(JanModelExtension._incompletedModelFileName)
-              )
+              )?.length >= model.sources.length
            )
          })
      }
@ -198,7 +218,6 @@ export default class JanModelExtension extends ModelExtension {

      const readJsonPromises = allDirectories.map(async (dirName) => {
        // filter out directories that don't match the selector
-
        // read model.json
        const jsonPath = await joinPath([
          JanModelExtension._homeDir,
@ -226,7 +245,21 @@ export default class JanModelExtension extends ModelExtension {
      const modelData = results.map((result) => {
        if (result.status === 'fulfilled') {
          try {
-            return result.value as Model
+            // This to ensure backward compatibility with `model.json` with `source_url`
+            const tmpModel =
+              typeof result.value === 'object'
+                ? result.value
+                : JSON.parse(result.value)
+            if (tmpModel['source_url'] != null) {
+              tmpModel['source'] = [
+                {
+                  filename: tmpModel.id,
+                  url: tmpModel['source_url'],
+                },
+              ]
+            }
+
+            return tmpModel as Model
          } catch {
            console.debug(`Unable to parse model metadata: ${result.value}`)
            return undefined
--- a/models/bakllava-1/model.json
+++ b/models/bakllava-1/model.json
@ -0,0 +1,33 @@
+{
+  "sources": [
+    {
+      "filename": "ggml-model-q5_k.gguf",
+      "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q5_k.gguf"
+    },
+    {
+      "filename": "mmproj-model-f16.gguf",
+      "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf"
+    }
+  ],
+  "id": "bakllava-1",
+  "object": "model",
+  "name": "BakLlava 1",
+  "version": "1.0",
+  "description": "BakLlava 1 can bring vision understanding to Jan",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
+    "llama_model_path": "ggml-model-q5_k.gguf",
+    "mmproj": "mmproj-model-f16.gguf"
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "Mys",
+    "tags": ["Vision"],
+    "size": 5750000000
+  },
+  "engine": "nitro"
+}
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf",
-    "id": "capybara-34b",
-    "object": "model",
-    "name": "Capybara 200k 34B Q5",
-    "version": "1.0",
-    "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "USER:\n{prompt}\nASSISTANT:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "NousResearch, The Bloke",
-      "tags": ["34B", "Finetuned"],
-      "size": 24320000000
-    }, 
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "nous-capybara-34b.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf"
+    }
+  ],
+  "id": "capybara-34b",
+  "object": "model",
+  "name": "Capybara 200k 34B Q5",
+  "version": "1.0",
+  "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "USER:\n{prompt}\nASSISTANT:",
+    "llama_model_path": "nous-capybara-34b.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "NousResearch, The Bloke",
+    "tags": ["34B", "Finetuned"],
+    "size": 24320000000
+  },
+  "engine": "nitro"
+}
--- a/models/codeninja-1.0-7b/model.json
+++ b/models/codeninja-1.0-7b/model.json
@ -1,29 +1,33 @@
 {
-    "source_url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf",
-    "id": "codeninja-1.0-7b",
-    "object": "model",
-    "name": "CodeNinja 7B Q4",
-    "version": "1.0",
-    "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": ["<|end_of_turn|>"],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Beowolx",
-      "tags": ["7B", "Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
+      "url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf"
+    }
+  ],
+  "id": "codeninja-1.0-7b",
+  "object": "model",
+  "name": "CodeNinja 7B Q4",
+  "version": "1.0",
+  "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
+    "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Beowolx",
+    "tags": ["7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/config/default-model.json
+++ b/models/config/default-model.json
@ -2,7 +2,12 @@
  "object": "model",
  "version": 1,
  "format": "gguf",
-  "source_url": "N/A",
+  "sources": [
+    {
+      "url": "N/A",
+      "filename": "N/A"
+    }
+  ],
  "id": "N/A",
  "name": "N/A",
  "created": 0,
@ -10,7 +15,8 @@
  "settings": {
    "ctx_len": 4096,
    "embedding": false,
-    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:"
+    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
+    "llama_model_path": "N/A"
  },
  "parameters": {
    "temperature": 0.7,
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@ -1,29 +1,34 @@
-
 {
-    "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf",
-    "id": "deepseek-coder-1.3b",
-    "object": "model",
-    "name": "Deepseek Coder 1.3B Q8",
-    "version": "1.0",
-    "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Deepseek, The Bloke",
-      "tags": ["Tiny", "Foundational Model"],
-      "size": 1430000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
+      "url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf"
+    }
+  ],
+  "id": "deepseek-coder-1.3b",
+  "object": "model",
+  "name": "Deepseek Coder 1.3B Q8",
+  "version": "1.0",
+  "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Deepseek, The Bloke",
+    "tags": ["Tiny", "Foundational Model"],
+    "size": 1430000000
+  },
+  "engine": "nitro"
+}
--- a/models/deepseek-coder-34b/model.json
+++ b/models/deepseek-coder-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf",
-    "id": "deepseek-coder-34b",
-    "object": "model",
-    "name": "Deepseek Coder 33B Q5",
-    "version": "1.0",
-    "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Deepseek, The Bloke",
-      "tags": ["34B", "Foundational Model"],
-      "size": 19940000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-coder-34b",
+  "object": "model",
+  "name": "Deepseek Coder 33B Q5",
+  "version": "1.0",
+  "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Deepseek, The Bloke",
+    "tags": ["34B", "Foundational Model"],
+    "size": 19940000000
+  },
+  "engine": "nitro"
+}
--- a/models/dolphin-2.7-mixtral-8x7b/model.json
+++ b/models/dolphin-2.7-mixtral-8x7b/model.json
@ -1,28 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
-    "id": "dolphin-2.7-mixtral-8x7b",
-    "object": "model",
-    "name": "Dolphin 8x7B Q4",
-    "version": "1.0",
-    "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Cognitive Computations, TheBloke",
-      "tags": ["70B", "Finetuned"],
-      "size": 26440000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
+    }
+  ],
+  "id": "dolphin-2.7-mixtral-8x7b",
+  "object": "model",
+  "name": "Dolphin 8x7B Q4",
+  "version": "1.0",
+  "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Cognitive Computations, TheBloke",
+    "tags": ["70B", "Finetuned"],
+    "size": 26440000000
+  },
+  "engine": "nitro"
+}
--- a/models/gpt-3.5-turbo-16k-0613/model.json
+++ b/models/gpt-3.5-turbo-16k-0613/model.json
@ -1,18 +1,20 @@
 {
-    "source_url": "https://openai.com",
-    "id": "gpt-3.5-turbo-16k-0613",
-    "object": "model",
-    "name": "OpenAI GPT 3.5 Turbo 16k 0613",
-    "version": "1.0",
-    "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
-    "format": "api",
-    "settings": {},
-    "parameters": {},
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "openai",
-    "state": "ready"
+  "sources": [
+    {
+      "url": "https://openai.com"
+    }
+  ],
+  "id": "gpt-3.5-turbo-16k-0613",
+  "object": "model",
+  "name": "OpenAI GPT 3.5 Turbo 16k 0613",
+  "version": "1.0",
+  "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
+  "format": "api",
+  "settings": {},
+  "parameters": {},
+  "metadata": {
+    "author": "OpenAI",
+    "tags": ["General", "Big Context Length"]
+  },
+  "engine": "openai"
 }
-  
--- a/models/gpt-3.5-turbo/model.json
+++ b/models/gpt-3.5-turbo/model.json
@ -1,18 +1,20 @@
 {
-    "source_url": "https://openai.com",
-    "id": "gpt-3.5-turbo",
-    "object": "model",
-    "name": "OpenAI GPT 3.5 Turbo",
-    "version": "1.0",
-    "description": "OpenAI GPT 3.5 Turbo model is extremely good",
-    "format": "api",
-    "settings": {},
-    "parameters": {},
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "openai",
-    "state": "ready"
+  "sources": [
+    {
+      "url": "https://openai.com"
+    }
+  ],
+  "id": "gpt-3.5-turbo",
+  "object": "model",
+  "name": "OpenAI GPT 3.5 Turbo",
+  "version": "1.0",
+  "description": "OpenAI GPT 3.5 Turbo model is extremely good",
+  "format": "api",
+  "settings": {},
+  "parameters": {},
+  "metadata": {
+    "author": "OpenAI",
+    "tags": ["General", "Big Context Length"]
+  },
+  "engine": "openai"
 }
-  
--- a/models/gpt-4/model.json
+++ b/models/gpt-4/model.json
@ -1,18 +1,20 @@
 {
-    "source_url": "https://openai.com",
-    "id": "gpt-4",
-    "object": "model",
-    "name": "OpenAI GPT 4",
-    "version": "1.0",
-    "description": "OpenAI GPT 4 model is extremely good",
-    "format": "api",
-    "settings": {},
-    "parameters": {},
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "openai",
-    "state": "ready"
+  "sources": [
+    {
+      "url": "https://openai.com"
+    }
+  ],
+  "id": "gpt-4",
+  "object": "model",
+  "name": "OpenAI GPT 4",
+  "version": "1.0",
+  "description": "OpenAI GPT 4 model is extremely good",
+  "format": "api",
+  "settings": {},
+  "parameters": {},
+  "metadata": {
+    "author": "OpenAI",
+    "tags": ["General", "Big Context Length"]
+  },
+  "engine": "openai"
 }
-  
--- a/models/llama2-chat-70b-q4/model.json
+++ b/models/llama2-chat-70b-q4/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf",
-    "id": "llama2-chat-70b-q4",
-    "object": "model",
-    "name": "Llama 2 Chat 70B Q4",
-    "version": "1.0",
-    "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MetaAI, The Bloke",
-      "tags": ["70B", "Foundational Model"],
-      "size": 43920000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "llama-2-70b-chat.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf"
+    }
+  ],
+  "id": "llama2-chat-70b-q4",
+  "object": "model",
+  "name": "Llama 2 Chat 70B Q4",
+  "version": "1.0",
+  "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
+    "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MetaAI, The Bloke",
+    "tags": ["70B", "Foundational Model"],
+    "size": 43920000000
+  },
+  "engine": "nitro"
+}
--- a/models/llama2-chat-7b-q4/model.json
+++ b/models/llama2-chat-7b-q4/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf",
-    "id": "llama2-chat-7b-q4",
-    "object": "model",
-    "name": "Llama 2 Chat 7B Q4",
-    "version": "1.0",
-    "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MetaAI, The Bloke",
-      "tags": ["7B", "Foundational Model"],
-      "size": 4080000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "llama-2-7b-chat.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
+    }
+  ],
+  "id": "llama2-chat-7b-q4",
+  "object": "model",
+  "name": "Llama 2 Chat 7B Q4",
+  "version": "1.0",
+  "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
+    "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MetaAI, The Bloke",
+    "tags": ["7B", "Foundational Model"],
+    "size": 4080000000
+  },
+  "engine": "nitro"
+}
--- a/models/llava-1.5-13b-q5/model.json
+++ b/models/llava-1.5-13b-q5/model.json
@ -0,0 +1,33 @@
+{
+  "sources": [
+    {
+      "filename": "ggml-model-q5_k.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf"
+    },
+    {
+      "filename": "mmproj-model-f16.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/mmproj-model-f16.gguf"
+    }
+  ],
+  "id": "llava-1.5-13b-q5",
+  "object": "model",
+  "name": "LlaVa 1.5 13B Q5 K",
+  "version": "1.0",
+  "description": "LlaVa 1.5 can bring vision understanding to Jan",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
+    "llama_model_path": "ggml-model-q5_k.gguf",
+    "mmproj": "mmproj-model-f16.gguf"
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "Mys",
+    "tags": ["Vision"],
+    "size": 9850000000
+  },
+  "engine": "nitro"
+}
--- a/models/llava-1.5-7b-q5/model.json
+++ b/models/llava-1.5-7b-q5/model.json
@ -0,0 +1,33 @@
+{
+  "sources": [
+    {
+      "filename": "ggml-model-q5_k.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf"
+    },
+    {
+      "filename": "mmproj-model-f16.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf"
+    }
+  ],
+  "id": "llava-1.5-7b-q5",
+  "object": "model",
+  "name": "LlaVa 1.5 7B Q5 K",
+  "version": "1.0",
+  "description": "LlaVa 1.5 can bring vision understanding to Jan",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
+    "llama_model_path": "ggml-model-q5_k.gguf",
+    "mmproj": "mmproj-model-f16.gguf"
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "Mys",
+    "tags": ["Vision"],
+    "size": 5400000000
+  },
+  "engine": "nitro"
+}
--- a/models/mistral-ins-7b-q4/model.json
+++ b/models/mistral-ins-7b-q4/model.json
@ -1,30 +1,35 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
-    "id": "mistral-ins-7b-q4",
-    "object": "model",
-    "name": "Mistral Instruct 7B Q4",
-    "version": "1.0",
-    "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] {prompt} [/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MistralAI, The Bloke",
-      "tags": ["Featured", "7B", "Foundational Model"],
-      "size": 4370000000,
-      "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+    }
+  ],
+  "id": "mistral-ins-7b-q4",
+  "object": "model",
+  "name": "Mistral Instruct 7B Q4",
+  "version": "1.0",
+  "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] {prompt} [/INST]",
+    "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MistralAI, The Bloke",
+    "tags": ["Featured", "7B", "Foundational Model"],
+    "size": 4370000000,
+    "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
+  },
+  "engine": "nitro"
+}
--- a/models/mixtral-8x7b-instruct/model.json
+++ b/models/mixtral-8x7b-instruct/model.json
@ -1,28 +1,33 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
-    "id": "mixtral-8x7b-instruct",
-    "object": "model",
-    "name": "Mixtral 8x7B Instruct Q4",
-    "version": "1.0",
-    "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] {prompt} [/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MistralAI, TheBloke",
-      "tags": ["70B", "Foundational Model"],
-      "size": 26440000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
+    }
+  ],
+  "id": "mixtral-8x7b-instruct",
+  "object": "model",
+  "name": "Mixtral 8x7B Instruct Q4",
+  "version": "1.0",
+  "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] {prompt} [/INST]",
+    "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MistralAI, TheBloke",
+    "tags": ["70B", "Foundational Model"],
+    "size": 26440000000
+  },
+  "engine": "nitro"
+}
--- a/models/noromaid-7b/model.json
+++ b/models/noromaid-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf",
-    "id": "noromaid-7b",
-    "object": "model",
-    "name": "Noromaid 7B Q5",
-    "version": "1.0",
-    "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "NeverSleep",
-      "tags": ["7B", "Merged"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "Noromaid-7b-v0.1.1.q5_k_m.gguf",
+      "url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf"
+    }
+  ],
+  "id": "noromaid-7b",
+  "object": "model",
+  "name": "Noromaid 7B Q5",
+  "version": "1.0",
+  "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:{prompt}\n### Response:",
+    "llama_model_path": "Noromaid-7b-v0.1.1.q5_k_m.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "NeverSleep",
+    "tags": ["7B", "Merged"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/openchat-3.5-7b/model.json
+++ b/models/openchat-3.5-7b/model.json
@ -1,28 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf",
-    "id": "openchat-3.5-7b",
-    "object": "model",
-    "name": "Openchat-3.5 7B Q4",
-    "version": "1.0",
-    "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": ["<|end_of_turn|>"],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Openchat",
-      "tags": ["Recommended", "7B", "Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "openchat-3.5-1210.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf"
+    }
+  ],
+  "id": "openchat-3.5-7b",
+  "object": "model",
+  "name": "Openchat-3.5 7B Q4",
+  "version": "1.0",
+  "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
+    "llama_model_path": "openchat-3.5-1210.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": ["<|end_of_turn|>"],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Openchat",
+    "tags": ["Recommended", "7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/openhermes-neural-7b/model.json
+++ b/models/openhermes-neural-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
-    "id": "openhermes-neural-7b",
-    "object": "model",
-    "name": "OpenHermes Neural 7B Q4",
-    "version": "1.0",
-    "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Intel, Jan",
-      "tags": ["7B", "Merged", "Featured"],
-      "size": 4370000000,
-      "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
+    }
+  ],
+  "id": "openhermes-neural-7b",
+  "object": "model",
+  "name": "OpenHermes Neural 7B Q4",
+  "version": "1.0",
+  "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Intel, Jan",
+    "tags": ["7B", "Merged", "Featured"],
+    "size": 4370000000,
+    "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
+  },
+  "engine": "nitro"
+}
--- a/models/phi-2-3b/model.json
+++ b/models/phi-2-3b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf",
-    "id": "phi-2-3b",
-    "object": "model",
-    "name": "Phi-2 3B Q8",
-    "version": "1.0",
-    "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "Intruct:\n{prompt}\nOutput:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Microsoft",
-      "tags": ["3B","Foundational Model"],
-      "size": 2960000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "phi-2.Q8_0.gguf",
+      "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf"
+    }
+  ],
+  "id": "phi-2-3b",
+  "object": "model",
+  "name": "Phi-2 3B Q8",
+  "version": "1.0",
+  "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "Intruct:\n{prompt}\nOutput:",
+    "llama_model_path": "phi-2.Q8_0.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Microsoft",
+    "tags": ["3B", "Foundational Model"],
+    "size": 2960000000
+  },
+  "engine": "nitro"
+}
--- a/models/phind-34b/model.json
+++ b/models/phind-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf",
-    "id": "phind-34b",
-    "object": "model",
-    "name": "Phind 34B Q5",
-    "version": "1.0",
-    "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Phind, The Bloke",
-      "tags": ["34B", "Finetuned"],
-      "size": 20220000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "phind-codellama-34b-v2.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf"
+    }
+  ],
+  "id": "phind-34b",
+  "object": "model",
+  "name": "Phind 34B Q5",
+  "version": "1.0",
+  "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
+    "llama_model_path": "phind-codellama-34b-v2.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Phind, The Bloke",
+    "tags": ["34B", "Finetuned"],
+    "size": 20220000000
+  },
+  "engine": "nitro"
+}
--- a/models/solar-10.7b-slerp/model.json
+++ b/models/solar-10.7b-slerp/model.json
@ -1,29 +1,33 @@
 {
-    "source_url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf",
-    "id": "solar-10.7b-slerp",
-    "object": "model",
-    "name": "Solar Slerp 10.7B Q4",
-    "version": "1.0",
-    "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### User: {prompt}\n### Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": ["13B","Finetuned"],
-      "size": 6360000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "solar-10.7b-slerp.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf"
+    }
+  ],
+  "id": "solar-10.7b-slerp",
+  "object": "model",
+  "name": "Solar Slerp 10.7B Q4",
+  "version": "1.0",
+  "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### User: {prompt}\n### Assistant:",
+    "llama_model_path": "solar-10.7b-slerp.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Jan",
+    "tags": ["13B", "Finetuned"],
+    "size": 6360000000
+  },
+  "engine": "nitro"
+}
--- a/models/starling-7b/model.json
+++ b/models/starling-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf",
-    "id": "starling-7b",
-    "object": "model",
-    "name": "Starling alpha 7B Q4",
-    "version": "1.0",
-    "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": ["<|end_of_turn|>"],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Berkeley-nest, The Bloke",
-      "tags": ["7B","Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "starling-lm-7b-alpha.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf"
+    }
+  ],
+  "id": "starling-7b",
+  "object": "model",
+  "name": "Starling alpha 7B Q4",
+  "version": "1.0",
+  "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:",
+    "llama_model_path": "starling-lm-7b-alpha.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": ["<|end_of_turn|>"],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Berkeley-nest, The Bloke",
+    "tags": ["7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/stealth-v1.2-7b/model.json
+++ b/models/stealth-v1.2-7b/model.json
@ -1,32 +1,33 @@
 {
-    "source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf",
-    "id": "stealth-v1.2-7b",
-    "object": "model",
-    "name": "Stealth 7B Q4",
-    "version": "1.0",
-    "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": [
-        "7B",
-        "Finetuned",
-        "Featured"
-      ],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "stealth-v1.3.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf"
+    }
+  ],
+  "id": "stealth-v1.2-7b",
+  "object": "model",
+  "name": "Stealth 7B Q4",
+  "version": "1.0",
+  "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Jan",
+    "tags": ["7B", "Finetuned", "Featured"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@ -1,5 +1,10 @@
 {
-  "source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+  "sources": [
+    {
+      "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+    }
+  ],
  "id": "tinyllama-1.1b",
  "object": "model",
  "name": "TinyLlama Chat 1.1B Q4",
@ -7,8 +12,9 @@
  "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
  "format": "gguf",
  "settings": {
-      "ctx_len": 2048,
-      "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>"
+    "ctx_len": 4096,
+    "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
+    "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
  },
  "parameters": {
    "temperature": 0.7,
@ -20,9 +26,9 @@
    "presence_penalty": 0
  },
  "metadata": {
-      "author": "TinyLlama",
-      "tags": ["Tiny", "Foundation Model"],
-      "size": 669000000
+    "author": "TinyLlama",
+    "tags": ["Tiny", "Foundation Model"],
+    "size": 669000000
  },
  "engine": "nitro"
-}
+}
--- a/models/trinity-v1.2-7b/model.json
+++ b/models/trinity-v1.2-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf",
-    "id": "trinity-v1.2-7b",
-    "object": "model",
-    "name": "Trinity-v1.2 7B Q4",
-    "version": "1.0",
-    "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": ["7B", "Merged", "Featured"],
-      "size": 4370000000,
-      "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
-    },
-    "engine": "nitro"
-  }  
+  "sources": [
+    {
+      "filename": "trinity-v1.2.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
+    }
+  ],
+  "id": "trinity-v1.2-7b",
+  "object": "model",
+  "name": "Trinity-v1.2 7B Q4",
+  "version": "1.0",
+  "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Jan",
+    "tags": ["7B", "Merged", "Featured"],
+    "size": 4370000000,
+    "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
+  },
+  "engine": "nitro"
+}
--- a/models/tulu-2-70b/model.json
+++ b/models/tulu-2-70b/model.json
@ -1,28 +1,33 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf",
-    "id": "tulu-2-70b",
-    "object": "model",
-    "name": "Tulu 2 70B Q4",
-    "version": "1.0",
-    "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|user|>\n{prompt}\n<|assistant|>"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Lizpreciatior, The Bloke",
-      "tags": ["70B", "Finetuned"],
-      "size": 41400000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "tulu-2-dpo-70b.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf"
+    }
+  ],
+  "id": "tulu-2-70b",
+  "object": "model",
+  "name": "Tulu 2 70B Q4",
+  "version": "1.0",
+  "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|user|>\n{prompt}\n<|assistant|>",
+    "llama_model_path": "tulu-2-dpo-70b.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Lizpreciatior, The Bloke",
+    "tags": ["70B", "Finetuned"],
+    "size": 41400000000
+  },
+  "engine": "nitro"
+}
--- a/models/wizardcoder-13b/model.json
+++ b/models/wizardcoder-13b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
-    "id": "wizardcoder-13b",
-    "object": "model",
-    "name": "Wizard Coder Python 13B Q5",
-    "version": "1.0",
-    "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "WizardLM, The Bloke",
-      "tags": ["Recommended", "13B", "Finetuned"],
-      "size": 7870000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
+    }
+  ],
+  "id": "wizardcoder-13b",
+  "object": "model",
+  "name": "Wizard Coder Python 13B Q5",
+  "version": "1.0",
+  "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "WizardLM, The Bloke",
+    "tags": ["Recommended", "13B", "Finetuned"],
+    "size": 7870000000
+  },
+  "engine": "nitro"
+}
--- a/models/yarn-mistral-7b/model.json
+++ b/models/yarn-mistral-7b/model.json
@ -1,29 +1,31 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf",
-    "id": "yarn-mistral-7b",
-    "object": "model",
-    "name": "Yarn Mistral 7B Q4",
-    "version": "1.0",
-    "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "{prompt}"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "NousResearch, The Bloke",
-      "tags": ["7B","Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf"
+    }
+  ],
+  "id": "yarn-mistral-7b",
+  "object": "model",
+  "name": "Yarn Mistral 7B Q4",
+  "version": "1.0",
+  "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "{prompt}"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "NousResearch, The Bloke",
+    "tags": ["7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/yi-34b/model.json
+++ b/models/yi-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf",
-    "id": "yi-34b",
-    "object": "model",
-    "name": "Yi 34B Q5",
-    "version": "1.0",
-    "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "01-ai, The Bloke",
-      "tags": ["34B", "Foundational Model"],
-      "size": 20660000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "yi-34b-chat.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf"
+    }
+  ],
+  "id": "yi-34b",
+  "object": "model",
+  "name": "Yi 34B Q5",
+  "version": "1.0",
+  "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "yi-34b-chat.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "01-ai, The Bloke",
+    "tags": ["34B", "Foundational Model"],
+    "size": 20660000000
+  },
+  "engine": "nitro"
+}
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@ -105,6 +105,7 @@ export default function EventListenerWrapper({ children }: PropsWithChildren) {
      })
    }
    return () => {}
+    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [])

  return (
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@ -6,6 +6,7 @@ import {
  ModelExtension,
  abortDownload,
  joinPath,
+  ModelArtifact,
 } from '@janhq/core'

 import { useSetAtom } from 'jotai'
@ -25,6 +26,23 @@ export default function useDownloadModel() {
  const addNewDownloadingModel = useSetAtom(addNewDownloadingModelAtom)

  const downloadModel = async (model: Model) => {
+    const childrenDownloadProgress: DownloadState[] = []
+    model.sources.forEach((source: ModelArtifact) => {
+      childrenDownloadProgress.push({
+        modelId: source.filename,
+        time: {
+          elapsed: 0,
+          remaining: 0,
+        },
+        speed: 0,
+        percent: 0,
+        size: {
+          total: 0,
+          transferred: 0,
+        },
+      })
+    })
+
    // set an initial download state
    setDownloadState({
      modelId: model.id,
@ -38,6 +56,7 @@ export default function useDownloadModel() {
        total: 0,
        transferred: 0,
      },
+      children: childrenDownloadProgress,
    })

    addNewDownloadingModel(model)
@ -46,6 +65,7 @@ export default function useDownloadModel() {
      .get<ModelExtension>(ExtensionTypeEnum.Model)
      ?.downloadModel(model, { ignoreSSL, proxy })
  }
+
  const abortModelDownload = async (model: Model) => {
    await abortDownload(
      await joinPath(['models', model.id, modelBinFileName(model)])
--- a/web/hooks/useGetConfiguredModels.ts
+++ b/web/hooks/useGetConfiguredModels.ts
@ -1,7 +1,6 @@
 import { useEffect, useState } from 'react'

-import { ExtensionTypeEnum, ModelExtension } from '@janhq/core'
-import { Model } from '@janhq/core'
+import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'

 import { extensionManager } from '@/extension/ExtensionManager'

@ -25,6 +24,7 @@ export function useGetConfiguredModels() {

  useEffect(() => {
    fetchModels()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [])

  return { loading, models }
--- a/web/hooks/useGetDownloadedModels.ts
+++ b/web/hooks/useGetDownloadedModels.ts
@ -6,7 +6,7 @@ import { atom, useAtom } from 'jotai'

 import { extensionManager } from '@/extension/ExtensionManager'

-const downloadedModelsAtom = atom<Model[]>([])
+export const downloadedModelsAtom = atom<Model[]>([])

 export function useGetDownloadedModels() {
  const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
@ -15,7 +15,8 @@ export function useGetDownloadedModels() {
    getDownloadedModels().then((downloadedModels) => {
      setDownloadedModels(downloadedModels)
    })
-  }, [setDownloadedModels])
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])

  return { downloadedModels, setDownloadedModels }
 }
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@ -110,6 +110,7 @@ export default function useRecommendedModel() {

    console.debug(`Using last used model ${lastUsedModel.id}`)
    setRecommendedModel(lastUsedModel)
+    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [getAndSortDownloadedModels, activeThread])

  useEffect(() => {
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@ -13,6 +13,7 @@ export const useSettings = () => {

  useEffect(() => {
    setTimeout(() => validateSettings, 3000)
+    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [])

  const validateSettings = async () => {
--- a/web/package.json
+++ b/web/package.json
@ -21,7 +21,7 @@
    "class-variance-authority": "^0.7.0",
    "framer-motion": "^10.16.4",
    "highlight.js": "^11.9.0",
-    "jotai": "^2.4.0",
+    "jotai": "^2.6.0",
    "lodash": "^4.17.21",
    "lucide-react": "^0.291.0",
    "marked": "^9.1.2",
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@ -26,7 +26,7 @@ import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import useDownloadModel from '@/hooks/useDownloadModel'
 import { useDownloadState } from '@/hooks/useDownloadState'
 import { getAssistants } from '@/hooks/useGetAssistants'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
+import { downloadedModelsAtom } from '@/hooks/useGetDownloadedModels'
 import { useMainViewState } from '@/hooks/useMainViewState'

 import { toGibibytes } from '@/utils/converter'
@ -43,8 +43,8 @@ type Props = {

 const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
  const { downloadModel } = useDownloadModel()
-  const { downloadedModels } = useGetDownloadedModels()
-  const { modelDownloadStateAtom, downloadStates } = useDownloadState()
+  const downloadedModels = useAtomValue(downloadedModelsAtom)
+  const { modelDownloadStateAtom } = useDownloadState()
  const { requestCreateNewThread } = useCreateNewThread()
  const totalRam = useAtomValue(totalRamAtom)
  const serverEnabled = useAtomValue(serverEnabledAtom)
@ -100,9 +100,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
        )}
      </Tooltip>
    )
-  }
-
-  if (downloadState != null && downloadStates.length > 0) {
+  } else if (downloadState != null) {
    downloadButton = <ModalCancelDownload model={model} />
  }

--- a/web/types/downloadState.d.ts
+++ b/web/types/downloadState.d.ts
@ -4,6 +4,8 @@ type DownloadState = {
  speed: number
  percent: number
  size: DownloadSize
+  isFinished?: boolean
+  children?: DownloadState[]
  error?: string
 }

--- a/web/utils/model.ts
+++ b/web/utils/model.ts
@ -2,7 +2,7 @@ import { Model } from '@janhq/core'

 export const modelBinFileName = (model: Model) => {
  const modelFormatExt = '.gguf'
-  const extractedFileName = model.source_url?.split('/').pop() ?? model.id
+  const extractedFileName = model.sources[0]?.url.split('/').pop() ?? model.id
  const fileName = extractedFileName.toLowerCase().endsWith(modelFormatExt)
    ? extractedFileName
    : model.id
--- a/web/utils/modelParam.ts
+++ b/web/utils/modelParam.ts
@ -40,6 +40,8 @@ export const toSettingParams = (
    n_parallel: undefined,
    cpu_threads: undefined,
    prompt_template: undefined,
+    llama_model_path: undefined,
+    mmproj: undefined,
  }
  const settingParams: ModelSettingParams = {}