Merge pull request #1888 from janhq/dev

Release cut 0.4.6
2024-02-02 01:34:57 +07:00 · 2024-02-02 01:34:57 +07:00 · 36ad16ff4e
commit 36ad16ff4e
parent 025415f88b 9e4658f5b2
183 changed files with 4834 additions and 2343 deletions
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@ -0,0 +1,4 @@
+{
+    "name": "jan",
+    "image": "node:20"
+}
--- a/.github/workflows/jan-electron-build-nightly.yml
+++ b/.github/workflows/jan-electron-build-nightly.yml
@ -8,7 +8,7 @@ on:
      - 'README.md'
      - 'docs/**'
  schedule:
-    - cron: '0 20 * * 2,3,4' # At 8 PM UTC on Tuesday, Wednesday, and Thursday, which is 3 AM UTC+7
+    - cron: '0 20 * * 1,2,3' # At 8 PM UTC on Monday, Tuesday, and Wednesday which is 3 AM UTC+7 Tuesday, Wednesday, and Thursday
  workflow_dispatch:
    inputs:
      public_provider:
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@ -98,8 +98,8 @@ jobs:
          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }}
-          ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }}
+          ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
+          ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}

      - name: Upload Artifact .deb file
        if: inputs.public_provider != 'github'
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@ -137,8 +137,8 @@ jobs:
          APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
          APP_PATH: "."
          DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
-          ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }}
-          ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }}
+          ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
+          ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}

      - name: Upload Artifact
        if: inputs.public_provider != 'github'
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@ -127,8 +127,8 @@ jobs:
          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          ANALYTICS_ID: ${{ secrets.JAN_APP_POSTHOG_PROJECT_API_KEY }}
-          ANALYTICS_HOST: ${{ secrets.JAN_APP_POSTHOG_URL }}
+          ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
+          ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
          AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }}
          AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
          AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
--- a/.github/workflows/update-release-url.yml
+++ b/.github/workflows/update-release-url.yml
@ -17,7 +17,7 @@ jobs:
        with:
          fetch-depth: "0"
          token: ${{ secrets.PAT_SERVICE_ACCOUNT }}
-          ref: main
+          ref: dev

      - name: Get Latest Release
        uses: pozetroninc/github-action-get-latest-release@v0.7.0
@ -46,4 +46,4 @@ jobs:
          git config --global user.name "Service Account"
          git add README.md
          git commit -m "Update README.md with Stable Download URLs"
-          git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:main
+          git -c http.extraheader="AUTHORIZATION: bearer ${{ secrets.PAT_SERVICE_ACCOUNT }}" push origin HEAD:dev
--- a/.gitignore
+++ b/.gitignore
@ -12,6 +12,8 @@ build
 electron/renderer
 electron/models
 electron/docs
+electron/engines
+server/pre-install
 package-lock.json

 *.log
@ -26,3 +28,4 @@ extensions/inference-nitro-extension/bin/*/*.exp
 extensions/inference-nitro-extension/bin/*/*.lib
 extensions/inference-nitro-extension/bin/saved-*
 extensions/inference-nitro-extension/bin/*.tar.gz
+
--- a/README.md
+++ b/README.md
@ -76,31 +76,31 @@ Jan is an open-source ChatGPT alternative that runs 100% offline on your compute
  <tr style="text-align:center">
    <td style="text-align:center"><b>Experimental (Nightly Build)</b></td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.4-170.exe'>
+      <a href='https://delta.jan.ai/latest/jan-win-x64-0.4.5-216.exe'>
        <img src='./docs/static/img/windows.png' style="height:14px; width: 14px" />
        <b>jan.exe</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.4-170.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-x64-0.4.5-216.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>Intel</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.4-170.dmg'>
+      <a href='https://delta.jan.ai/latest/jan-mac-arm64-0.4.5-216.dmg'>
        <img src='./docs/static/img/mac.png' style="height:15px; width: 15px" />
        <b>M1/M2</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.4-170.deb'>
+      <a href='https://delta.jan.ai/latest/jan-linux-amd64-0.4.5-216.deb'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.deb</b>
      </a>
    </td>
    <td style="text-align:center">
-      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.4-170.AppImage'>
+      <a href='https://delta.jan.ai/latest/jan-linux-x86_64-0.4.5-216.AppImage'>
        <img src='./docs/static/img/linux.png' style="height:14px; width: 14px" />
        <b>jan.AppImage</b>
      </a>
--- a/core/.prettierignore
+++ b/core/.prettierignore
@ -0,0 +1,5 @@
+.next/
+node_modules/
+dist/
+*.hbs
+*.mdx
--- a/core/src/api/index.ts
+++ b/core/src/api/index.ts
@ -3,7 +3,6 @@
 * @description Enum of all the routes exposed by the app
 */
 export enum AppRoute {
-  appDataPath = 'appDataPath',
  openExternalUrl = 'openExternalUrl',
  openAppDirectory = 'openAppDirectory',
  openFileExplore = 'openFileExplorer',
@ -12,6 +11,7 @@ export enum AppRoute {
  updateAppConfiguration = 'updateAppConfiguration',
  relaunch = 'relaunch',
  joinPath = 'joinPath',
+  isSubdirectory = 'isSubdirectory',
  baseName = 'baseName',
  startServer = 'startServer',
  stopServer = 'stopServer',
@ -61,7 +61,9 @@ export enum FileManagerRoute {
  syncFile = 'syncFile',
  getJanDataFolderPath = 'getJanDataFolderPath',
  getResourcePath = 'getResourcePath',
+  getUserHomePath = 'getUserHomePath',
  fileStat = 'fileStat',
+  writeBlob = 'writeBlob',
 }

 export type ApiFunction = (...args: any[]) => any
--- a/core/src/core.ts
+++ b/core/src/core.ts
@ -22,7 +22,11 @@ const executeOnMain: (extension: string, method: string, ...args: any[]) => Prom
 * @param {object} network - Optional object to specify proxy/whether to ignore SSL certificates.
 * @returns {Promise<any>} A promise that resolves when the file is downloaded.
 */
-const downloadFile: (url: string, fileName: string, network?: { proxy?: string, ignoreSSL?: boolean }) => Promise<any> = (url, fileName, network) => {
+const downloadFile: (
+  url: string,
+  fileName: string,
+  network?: { proxy?: string; ignoreSSL?: boolean }
+) => Promise<any> = (url, fileName, network) => {
  return global.core?.api?.downloadFile(url, fileName, network)
 }

@ -79,6 +83,12 @@ const openExternalUrl: (url: string) => Promise<any> = (url) =>
 */
 const getResourcePath: () => Promise<string> = () => global.core.api?.getResourcePath()

+/**
+ * Gets the user's home path.
+ * @returns return user's home path
+ */
+const getUserHomePath = (): Promise<string> => global.core.api?.getUserHomePath()
+
 /**
 * Log to file from browser processes.
 *
@ -87,6 +97,17 @@ const getResourcePath: () => Promise<string> = () => global.core.api?.getResourc
 const log: (message: string, fileName?: string) => void = (message, fileName) =>
  global.core.api?.log(message, fileName)

+/**
+ * Check whether the path is a subdirectory of another path.
+ *
+ * @param from - The path to check.
+ * @param to - The path to check against.
+ *
+ * @returns {Promise<boolean>} - A promise that resolves with a boolean indicating whether the path is a subdirectory.
+ */
+const isSubdirectory: (from: string, to: string) => Promise<boolean> = (from: string, to: string) =>
+  global.core.api?.isSubdirectory(from, to)
+
 /**
 * Register extension point function type definition
 */
@ -94,7 +115,7 @@ export type RegisterExtensionPoint = (
  extensionName: string,
  extensionId: string,
  method: Function,
-  priority?: number,
+  priority?: number
 ) => void

 /**
@ -111,5 +132,7 @@ export {
  openExternalUrl,
  baseName,
  log,
+  isSubdirectory,
+  getUserHomePath,
  FileStat,
 }
--- a/core/src/fs.ts
+++ b/core/src/fs.ts
@ -1,4 +1,4 @@
-import { FileStat } from "./types"
+import { FileStat } from './types'

 /**
 * Writes data to a file at the specified path.
@ -6,6 +6,15 @@ import { FileStat } from "./types"
 */
 const writeFileSync = (...args: any[]) => global.core.api?.writeFileSync(...args)

+/**
+ * Writes blob data to a file at the specified path.
+ * @param path - The path to file.
+ * @param data - The blob data.
+ * @returns
+ */
+const writeBlob: (path: string, data: string) => Promise<any> = (path, data) =>
+  global.core.api?.writeBlob(path, data)
+
 /**
 * Reads the contents of a file at the specified path.
 * @returns {Promise<any>} A Promise that resolves with the contents of the file.
@ -60,7 +69,6 @@ const syncFile: (src: string, dest: string) => Promise<any> = (src, dest) =>
 */
 const copyFileSync = (...args: any[]) => global.core.api?.copyFileSync(...args)

-
 /**
 * Gets the file's stats.
 *
@ -70,7 +78,6 @@ const copyFileSync = (...args: any[]) => global.core.api?.copyFileSync(...args)
 const fileStat: (path: string) => Promise<FileStat | undefined> = (path) =>
  global.core.api?.fileStat(path)

-
 // TODO: Export `dummy` fs functions automatically
 // Currently adding these manually
 export const fs = {
@ -84,5 +91,6 @@ export const fs = {
  appendFileSync,
  copyFileSync,
  syncFile,
-  fileStat
+  fileStat,
+  writeBlob,
 }
--- a/core/src/node/api/common/builder.ts
+++ b/core/src/node/api/common/builder.ts
@ -2,7 +2,8 @@ import fs from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
 import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
-import { getJanDataFolderPath } from '../../utils'
+import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
+import { DEFAULT_CHAT_COMPLETION_URL } from './consts'

 export const getBuilder = async (configuration: RouteConfiguration) => {
  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
@ -265,19 +266,22 @@ export const downloadModel = async (
  const modelBinaryPath = join(directoryPath, modelId)

  const request = require('request')
-  const rq = request({ url: model.source_url, strictSSL, proxy })
  const progress = require('request-progress')
-  progress(rq, {})
-    .on('progress', function (state: any) {
-      console.log('progress', JSON.stringify(state, null, 2))
-    })
-    .on('error', function (err: Error) {
-      console.error('error', err)
-    })
-    .on('end', function () {
-      console.log('end')
-    })
-    .pipe(fs.createWriteStream(modelBinaryPath))
+
+  for (const source of model.sources) {
+    const rq = request({ url: source, strictSSL, proxy })
+    progress(rq, {})
+      .on('progress', function (state: any) {
+        console.debug('progress', JSON.stringify(state, null, 2))
+      })
+      .on('error', function (err: Error) {
+        console.error('error', err)
+      })
+      .on('end', function () {
+        console.debug('end')
+      })
+      .pipe(fs.createWriteStream(modelBinaryPath))
+  }

  return {
    message: `Starting download ${modelId}`,
@ -306,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
  const engineConfiguration = await getEngineConfiguration(requestedModel.engine)

  let apiKey: string | undefined = undefined
-  let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
+  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL

  if (engineConfiguration) {
    apiKey = engineConfiguration.api_key
@ -317,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
    'Content-Type': 'text/event-stream',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
-    "Access-Control-Allow-Origin": "*"
+    'Access-Control-Allow-Origin': '*',
  })

  const headers: Record<string, any> = {
@ -343,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
    response.body.pipe(reply.raw)
  }
 }
-
-const getEngineConfiguration = async (engineId: string) => {
-  if (engineId !== 'openai') {
-    return undefined
-  }
-  const directoryPath = join(getJanDataFolderPath(), 'engines')
-  const filePath = join(directoryPath, `${engineId}.json`)
-  const data = await fs.readFileSync(filePath, 'utf-8')
-  return JSON.parse(data)
-}
--- a/core/src/node/api/common/consts.ts
+++ b/core/src/node/api/common/consts.ts
@ -0,0 +1,19 @@
+// The PORT to use for the Nitro subprocess
+export const NITRO_DEFAULT_PORT = 3928
+
+// The HOST address to use for the Nitro subprocess
+export const LOCAL_HOST = '127.0.0.1'
+
+export const SUPPORTED_MODEL_FORMAT = '.gguf'
+
+// The URL for the Nitro subprocess
+const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
+// The URL for the Nitro subprocess to load a model
+export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
+// The URL for the Nitro subprocess to validate a model
+export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
+
+// The URL for the Nitro subprocess to kill itself
+export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
+
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
--- a/core/src/node/api/common/startStopModel.ts
+++ b/core/src/node/api/common/startStopModel.ts
@ -0,0 +1,351 @@
+import fs from 'fs'
+import { join } from 'path'
+import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
+import { logServer } from '../../log'
+import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
+import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
+import {
+  LOCAL_HOST,
+  NITRO_DEFAULT_PORT,
+  NITRO_HTTP_KILL_URL,
+  NITRO_HTTP_LOAD_MODEL_URL,
+  NITRO_HTTP_VALIDATE_MODEL_URL,
+  SUPPORTED_MODEL_FORMAT,
+} from './consts'
+
+// The subprocess instance for Nitro
+let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
+
+// TODO: move this to core type
+interface NitroModelSettings extends ModelSettingParams {
+  llama_model_path: string
+  cpu_threads: number
+}
+
+export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
+  try {
+    await runModel(modelId, settingParams)
+
+    return {
+      message: `Model ${modelId} started`,
+    }
+  } catch (e) {
+    return {
+      error: e,
+    }
+  }
+}
+
+const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
+  const janDataFolderPath = getJanDataFolderPath()
+  const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
+
+  if (!fs.existsSync(modelFolderFullPath)) {
+    throw `Model not found: ${modelId}`
+  }
+
+  const files: string[] = fs.readdirSync(modelFolderFullPath)
+
+  // Look for GGUF model file
+  const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
+
+  const modelMetadataPath = join(modelFolderFullPath, 'model.json')
+  const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
+
+  if (!ggufBinFile) {
+    throw 'No GGUF model file found'
+  }
+  const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
+
+  const nitroResourceProbe = await getSystemResourceInfo()
+  const nitroModelSettings: NitroModelSettings = {
+    ...modelMetadata.settings,
+    ...settingParams,
+    llama_model_path: modelBinaryPath,
+    // This is critical and requires real CPU physical core count (or performance core)
+    cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
+    ...(modelMetadata.settings.mmproj && {
+      mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
+    }),
+  }
+
+  logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
+
+  // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
+  if (modelMetadata.settings.prompt_template) {
+    const promptTemplate = modelMetadata.settings.prompt_template
+    const prompt = promptTemplateConverter(promptTemplate)
+    if (prompt?.error) {
+      return Promise.reject(prompt.error)
+    }
+    nitroModelSettings.system_prompt = prompt.system_prompt
+    nitroModelSettings.user_prompt = prompt.user_prompt
+    nitroModelSettings.ai_prompt = prompt.ai_prompt
+  }
+
+  await runNitroAndLoadModel(modelId, nitroModelSettings)
+}
+
+// TODO: move to util
+const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
+  // Split the string using the markers
+  const systemMarker = '{system_message}'
+  const promptMarker = '{prompt}'
+
+  if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
+    // Find the indices of the markers
+    const systemIndex = promptTemplate.indexOf(systemMarker)
+    const promptIndex = promptTemplate.indexOf(promptMarker)
+
+    // Extract the parts of the string
+    const system_prompt = promptTemplate.substring(0, systemIndex)
+    const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
+    const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
+
+    // Return the split parts
+    return { system_prompt, user_prompt, ai_prompt }
+  } else if (promptTemplate.includes(promptMarker)) {
+    // Extract the parts of the string for the case where only promptMarker is present
+    const promptIndex = promptTemplate.indexOf(promptMarker)
+    const user_prompt = promptTemplate.substring(0, promptIndex)
+    const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
+
+    // Return the split parts
+    return { user_prompt, ai_prompt }
+  }
+
+  // Return an error if none of the conditions are met
+  return { error: 'Cannot split prompt template' }
+}
+
+const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
+  // Gather system information for CPU physical cores and memory
+  const tcpPortUsed = require('tcp-port-used')
+
+  await stopModel(modelId)
+  await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
+
+  /**
+   * There is a problem with Windows process manager
+   * Should wait for awhile to make sure the port is free and subprocess is killed
+   * The tested threshold is 500ms
+   **/
+  if (process.platform === 'win32') {
+    await new Promise((resolve) => setTimeout(resolve, 500))
+  }
+
+  await spawnNitroProcess()
+  await loadLLMModel(modelSettings)
+  await validateModelStatus()
+}
+
+const spawnNitroProcess = async (): Promise<void> => {
+  logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
+
+  let binaryFolder = join(
+    getJanExtensionsPath(),
+    '@janhq',
+    'inference-nitro-extension',
+    'dist',
+    'bin'
+  )
+
+  let executableOptions = executableNitroFile()
+  const tcpPortUsed = require('tcp-port-used')
+
+  const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
+  // Execute the binary
+  logServer(
+    `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+  )
+  subprocess = spawn(
+    executableOptions.executablePath,
+    ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
+    {
+      cwd: binaryFolder,
+      env: {
+        ...process.env,
+        CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+      },
+    }
+  )
+
+  // Handle subprocess output
+  subprocess.stdout.on('data', (data: any) => {
+    logServer(`[NITRO]::Debug: ${data}`)
+  })
+
+  subprocess.stderr.on('data', (data: any) => {
+    logServer(`[NITRO]::Error: ${data}`)
+  })
+
+  subprocess.on('close', (code: any) => {
+    logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
+    subprocess = undefined
+  })
+
+  tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
+    logServer(`[NITRO]::Debug: Nitro is ready`)
+  })
+}
+
+type NitroExecutableOptions = {
+  executablePath: string
+  cudaVisibleDevices: string
+}
+
+const executableNitroFile = (): NitroExecutableOptions => {
+  const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
+  let binaryFolder = join(
+    getJanExtensionsPath(),
+    '@janhq',
+    'inference-nitro-extension',
+    'dist',
+    'bin'
+  )
+
+  let cudaVisibleDevices = ''
+  let binaryName = 'nitro'
+  /**
+   * The binary folder is different for each platform.
+   */
+  if (process.platform === 'win32') {
+    /**
+     *  For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
+     */
+    let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
+    if (nvidiaInfo['run_mode'] === 'cpu') {
+      binaryFolder = join(binaryFolder, 'win-cpu')
+    } else {
+      if (nvidiaInfo['cuda'].version === '12') {
+        binaryFolder = join(binaryFolder, 'win-cuda-12-0')
+      } else {
+        binaryFolder = join(binaryFolder, 'win-cuda-11-7')
+      }
+      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
+    }
+    binaryName = 'nitro.exe'
+  } else if (process.platform === 'darwin') {
+    /**
+     *  For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
+     */
+    if (process.arch === 'arm64') {
+      binaryFolder = join(binaryFolder, 'mac-arm64')
+    } else {
+      binaryFolder = join(binaryFolder, 'mac-x64')
+    }
+  } else {
+    /**
+     *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
+     */
+    let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
+    if (nvidiaInfo['run_mode'] === 'cpu') {
+      binaryFolder = join(binaryFolder, 'linux-cpu')
+    } else {
+      if (nvidiaInfo['cuda'].version === '12') {
+        binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
+      } else {
+        binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
+      }
+      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
+    }
+  }
+
+  return {
+    executablePath: join(binaryFolder, binaryName),
+    cudaVisibleDevices,
+  }
+}
+
+const validateModelStatus = async (): Promise<void> => {
+  // Send a GET request to the validation URL.
+  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
+  const fetchRT = require('fetch-retry')
+  const fetchRetry = fetchRT(fetch)
+
+  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
+    method: 'GET',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    retries: 5,
+    retryDelay: 500,
+  }).then(async (res: Response) => {
+    logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
+    // If the response is OK, check model_loaded status.
+    if (res.ok) {
+      const body = await res.json()
+      // If the model is loaded, return an empty object.
+      // Otherwise, return an object with an error message.
+      if (body.model_loaded) {
+        return Promise.resolve()
+      }
+    }
+    return Promise.reject('Validate model status failed')
+  })
+}
+
+const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
+  logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
+  const fetchRT = require('fetch-retry')
+  const fetchRetry = fetchRT(fetch)
+
+  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(settings),
+    retries: 3,
+    retryDelay: 500,
+  })
+    .then((res: any) => {
+      logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
+      return Promise.resolve(res)
+    })
+    .catch((err: any) => {
+      logServer(`[NITRO]::Error: Load model failed with error ${err}`)
+      return Promise.reject(err)
+    })
+}
+
+/**
+ * Stop model and kill nitro process.
+ */
+export const stopModel = async (_modelId: string) => {
+  if (!subprocess) {
+    return {
+      error: "Model isn't running",
+    }
+  }
+  return new Promise((resolve, reject) => {
+    const controller = new AbortController()
+    setTimeout(() => {
+      controller.abort()
+      reject({
+        error: 'Failed to stop model: Timedout',
+      })
+    }, 5000)
+    const tcpPortUsed = require('tcp-port-used')
+    logServer(`[NITRO]::Debug: Request to kill Nitro`)
+
+    fetch(NITRO_HTTP_KILL_URL, {
+      method: 'DELETE',
+      signal: controller.signal,
+    })
+      .then(() => {
+        subprocess?.kill()
+        subprocess = undefined
+      })
+      .catch(() => {
+        // don't need to do anything, we still kill the subprocess
+      })
+      .then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
+      .then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
+      .then(() =>
+        resolve({
+          message: 'Model stopped',
+        })
+      )
+  })
+}
--- a/core/src/node/api/routes/common.ts
+++ b/core/src/node/api/routes/common.ts
@ -10,6 +10,8 @@ import {
 } from '../common/builder'

 import { JanApiRouteConfiguration } from '../common/configuration'
+import { startModel, stopModel } from '../common/startStopModel'
+import { ModelSettingParams } from '../../../types'

 export const commonRouter = async (app: HttpServer) => {
  // Common Routes
@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
    app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))

    app.get(`/${key}/:id`, async (request: any) =>
-      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
+      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
    )

    app.delete(`/${key}/:id`, async (request: any) =>
-      deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
+      deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
    )
  })

  // Download Model Routes
  app.get(`/models/download/:modelId`, async (request: any) =>
-    downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
+    downloadModel(request.params.modelId, {
+      ignoreSSL: request.query.ignoreSSL === 'true',
+      proxy: request.query.proxy,
+    })
  )

+  app.put(`/models/:modelId/start`, async (request: any) => {
+    let settingParams: ModelSettingParams | undefined = undefined
+    if (Object.keys(request.body).length !== 0) {
+      settingParams = JSON.parse(request.body) as ModelSettingParams
+    }
+
+    return startModel(request.params.modelId, settingParams)
+  })
+
+  app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
+
  // Chat Completion Routes
  app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))

--- a/core/src/node/api/routes/fileManager.ts
+++ b/core/src/node/api/routes/fileManager.ts
@ -8,5 +8,7 @@ export const fsRouter = async (app: HttpServer) => {

  app.post(`/app/${FileManagerRoute.getResourcePath}`, async (request: any, reply: any) => {})

+  app.post(`/app/${FileManagerRoute.getUserHomePath}`, async (request: any, reply: any) => {})
+
  app.post(`/app/${FileManagerRoute.fileStat}`, async (request: any, reply: any) => {})
 }
--- a/core/src/node/api/routes/fs.ts
+++ b/core/src/node/api/routes/fs.ts
@ -2,6 +2,7 @@ import { FileSystemRoute } from '../../../api'
 import { join } from 'path'
 import { HttpServer } from '../HttpServer'
 import { getJanDataFolderPath } from '../../utils'
+import { normalizeFilePath } from '../../path'

 export const fsRouter = async (app: HttpServer) => {
  const moduleName = 'fs'
@ -13,10 +14,10 @@ export const fsRouter = async (app: HttpServer) => {
        const result = await import(moduleName).then((mdl) => {
          return mdl[route](
            ...body.map((arg: any) =>
-              typeof arg === 'string' && arg.includes('file:/')
-                ? join(getJanDataFolderPath(), arg.replace('file:/', ''))
-                : arg,
-            ),
+              typeof arg === 'string' && (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
+                ? join(getJanDataFolderPath(), normalizeFilePath(arg))
+                : arg
+            )
          )
        })
        res.status(200).send(result)
--- a/core/src/node/utils/index.ts
+++ b/core/src/node/utils/index.ts
@ -1,16 +1,18 @@
-import { AppConfiguration } from "../../types";
-import { join } from "path";
-import fs from "fs";
-import os from "os";
+import { AppConfiguration, SystemResourceInfo } from '../../types'
+import { join } from 'path'
+import fs from 'fs'
+import os from 'os'
+import { log, logServer } from '../log'
+import childProcess from 'child_process'

 // TODO: move this to core
-const configurationFileName = "settings.json";
+const configurationFileName = 'settings.json'

 // TODO: do no specify app name in framework module
-const defaultJanDataFolder = join(os.homedir(), "jan");
+const defaultJanDataFolder = join(os.homedir(), 'jan')
 const defaultAppConfig: AppConfiguration = {
  data_folder: defaultJanDataFolder,
-};
+}

 /**
 * Getting App Configurations.
@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
 export const getAppConfigurations = (): AppConfiguration => {
  // Retrieve Application Support folder path
  // Fallback to user home directory if not found
-  const configurationFile = getConfigurationFilePath();
+  const configurationFile = getConfigurationFilePath()

  if (!fs.existsSync(configurationFile)) {
    // create default app config if we don't have one
-    console.debug(`App config not found, creating default config at ${configurationFile}`);
-    fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
-    return defaultAppConfig;
+    console.debug(`App config not found, creating default config at ${configurationFile}`)
+    fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
+    return defaultAppConfig
  }

  try {
    const appConfigurations: AppConfiguration = JSON.parse(
-      fs.readFileSync(configurationFile, "utf-8"),
-    );
-    return appConfigurations;
+      fs.readFileSync(configurationFile, 'utf-8')
+    )
+    return appConfigurations
  } catch (err) {
-    console.error(`Failed to read app config, return default config instead! Err: ${err}`);
-    return defaultAppConfig;
+    console.error(`Failed to read app config, return default config instead! Err: ${err}`)
+    return defaultAppConfig
  }
-};
+}

 const getConfigurationFilePath = () =>
  join(
-    global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
-    configurationFileName,
-  );
+    global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
+    configurationFileName
+  )

 export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
-  const configurationFile = getConfigurationFilePath();
-  console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
+  const configurationFile = getConfigurationFilePath()
+  console.debug('updateAppConfiguration, configurationFile: ', configurationFile)

-  fs.writeFileSync(configurationFile, JSON.stringify(configuration));
-  return Promise.resolve();
-};
+  fs.writeFileSync(configurationFile, JSON.stringify(configuration))
+  return Promise.resolve()
+}

 /**
 * Utility function to get server log path
@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
 * @returns {string} The log path.
 */
 export const getServerLogPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  const logFolderPath = join(appConfigurations.data_folder, "logs");
+  const appConfigurations = getAppConfigurations()
+  const logFolderPath = join(appConfigurations.data_folder, 'logs')
  if (!fs.existsSync(logFolderPath)) {
-    fs.mkdirSync(logFolderPath, { recursive: true });
+    fs.mkdirSync(logFolderPath, { recursive: true })
  }
-  return join(logFolderPath, "server.log");
-};
+  return join(logFolderPath, 'server.log')
+}

 /**
 * Utility function to get app log path
@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
 * @returns {string} The log path.
 */
 export const getAppLogPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  const logFolderPath = join(appConfigurations.data_folder, "logs");
+  const appConfigurations = getAppConfigurations()
+  const logFolderPath = join(appConfigurations.data_folder, 'logs')
  if (!fs.existsSync(logFolderPath)) {
-    fs.mkdirSync(logFolderPath, { recursive: true });
+    fs.mkdirSync(logFolderPath, { recursive: true })
  }
-  return join(logFolderPath, "app.log");
-};
+  return join(logFolderPath, 'app.log')
+}

 /**
 * Utility function to get data folder path
@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
 * @returns {string} The data folder path.
 */
 export const getJanDataFolderPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  return appConfigurations.data_folder;
-};
+  const appConfigurations = getAppConfigurations()
+  return appConfigurations.data_folder
+}

 /**
 * Utility function to get extension path
@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
 * @returns {string} The extensions path.
 */
 export const getJanExtensionsPath = (): string => {
-  const appConfigurations = getAppConfigurations();
-  return join(appConfigurations.data_folder, "extensions");
-};
+  const appConfigurations = getAppConfigurations()
+  return join(appConfigurations.data_folder, 'extensions')
+}
+
+/**
+ * Utility function to physical cpu count
+ *
+ * @returns {number} The physical cpu count.
+ */
+export const physicalCpuCount = async (): Promise<number> => {
+  const platform = os.platform()
+  if (platform === 'linux') {
+    const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
+    return parseInt(output.trim(), 10)
+  } else if (platform === 'darwin') {
+    const output = await exec('sysctl -n hw.physicalcpu_max')
+    return parseInt(output.trim(), 10)
+  } else if (platform === 'win32') {
+    const output = await exec('WMIC CPU Get NumberOfCores')
+    return output
+      .split(os.EOL)
+      .map((line: string) => parseInt(line))
+      .filter((value: number) => !isNaN(value))
+      .reduce((sum: number, number: number) => sum + number, 1)
+  } else {
+    const cores = os.cpus().filter((cpu: any, index: number) => {
+      const hasHyperthreading = cpu.model.includes('Intel')
+      const isOdd = index % 2 === 1
+      return !hasHyperthreading || isOdd
+    })
+    return cores.length
+  }
+}
+
+const exec = async (command: string): Promise<string> => {
+  return new Promise((resolve, reject) => {
+    childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
+      if (error) {
+        reject(error)
+      } else {
+        resolve(stdout)
+      }
+    })
+  })
+}
+
+export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
+  const cpu = await physicalCpuCount()
+  const message = `[NITRO]::CPU informations - ${cpu}`
+  log(message)
+  logServer(message)
+
+  return {
+    numCpuPhysicalCore: cpu,
+    memAvailable: 0, // TODO: this should not be 0
+  }
+}
+
+export const getEngineConfiguration = async (engineId: string) => {
+  if (engineId !== 'openai') {
+    return undefined
+  }
+  const directoryPath = join(getJanDataFolderPath(), 'engines')
+  const filePath = join(directoryPath, `${engineId}.json`)
+  const data = fs.readFileSync(filePath, 'utf-8')
+  return JSON.parse(data)
+}
--- a/core/src/types/assistant/assistantEntity.ts
+++ b/core/src/types/assistant/assistantEntity.ts
@ -2,6 +2,13 @@
 * Assistant type defines the shape of an assistant object.
 * @stored
 */
+
+export type AssistantTool = {
+  type: string
+  enabled: boolean
+  settings: any
+}
+
 export type Assistant = {
  /** Represents the avatar of the user. */
  avatar: string
@ -22,7 +29,7 @@ export type Assistant = {
  /** Represents the instructions for the object. */
  instructions?: string
  /** Represents the tools associated with the object. */
-  tools?: any
+  tools?: AssistantTool[]
  /** Represents the file identifiers associated with the object. */
  file_ids: string[]
  /** Represents the metadata of the object. */
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@ -6,3 +6,4 @@ export * from './inference'
 export * from './monitoring'
 export * from './file'
 export * from './config'
+export * from './miscellaneous'
--- a/core/src/types/inference/inferenceEntity.ts
+++ b/core/src/types/inference/inferenceEntity.ts
@ -1,3 +1,5 @@
+import { ContentType, ContentValue } from '../message'
+
 /**
 * The role of the author of this message.
 */
@ -13,7 +15,32 @@ export enum ChatCompletionRole {
 */
 export type ChatCompletionMessage = {
  /** The contents of the message. **/
-  content?: string
+  content?: ChatCompletionMessageContent
  /** The role of the author of this message. **/
  role: ChatCompletionRole
 }
+
+export type ChatCompletionMessageContent =
+  | string
+  | (ChatCompletionMessageContentText &
+      ChatCompletionMessageContentImage &
+      ChatCompletionMessageContentDoc)[]
+
+export enum ChatCompletionMessageContentType {
+  Text = 'text',
+  Image = 'image_url',
+  Doc = 'doc_url',
+}
+
+export type ChatCompletionMessageContentText = {
+  type: ChatCompletionMessageContentType
+  text: string
+}
+export type ChatCompletionMessageContentImage = {
+  type: ChatCompletionMessageContentType
+  image_url: { url: string }
+}
+export type ChatCompletionMessageContentDoc = {
+  type: ChatCompletionMessageContentType
+  doc_url: { url: string }
+}
--- a/core/src/types/message/messageEntity.ts
+++ b/core/src/types/message/messageEntity.ts
@ -1,5 +1,6 @@
 import { ChatCompletionMessage, ChatCompletionRole } from '../inference'
 import { ModelInfo } from '../model'
+import { Thread } from '../thread'

 /**
 * The `ThreadMessage` type defines the shape of a thread's message object.
@ -35,7 +36,10 @@ export type ThreadMessage = {
 export type MessageRequest = {
  id?: string

-  /** The thread id of the message request. **/
+  /**
+   * @deprecated Use thread object instead
+   * The thread id of the message request.
+   */
  threadId: string

  /**
@ -48,6 +52,10 @@ export type MessageRequest = {

  /** Settings for constructing a chat completion request **/
  model?: ModelInfo
+
+  /** The thread of this message is belong to. **/
+  // TODO: deprecate threadId field
+  thread?: Thread
 }

 /**
@ -62,7 +70,7 @@ export enum MessageStatus {
  /** Message loaded with error. **/
  Error = 'error',
  /** Message is cancelled streaming */
-  Stopped = "stopped"
+  Stopped = 'stopped',
 }

 /**
@ -71,6 +79,7 @@ export enum MessageStatus {
 export enum ContentType {
  Text = 'text',
  Image = 'image',
+  Pdf = 'pdf',
 }

 /**
@ -80,6 +89,8 @@ export enum ContentType {
 export type ContentValue = {
  value: string
  annotations: string[]
+  name?: string
+  size?: number
 }

 /**
--- a/core/src/types/miscellaneous/index.ts
+++ b/core/src/types/miscellaneous/index.ts
@ -0,0 +1,2 @@
+export * from './systemResourceInfo'
+export * from './promptTemplate'
--- a/core/src/types/miscellaneous/promptTemplate.ts
+++ b/core/src/types/miscellaneous/promptTemplate.ts
@ -0,0 +1,6 @@
+export type PromptTemplate = {
+  system_prompt?: string
+  ai_prompt?: string
+  user_prompt?: string
+  error?: string
+}
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -0,0 +1,4 @@
+export type SystemResourceInfo = {
+  numCpuPhysicalCore: number
+  memAvailable: number
+}
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -7,6 +7,7 @@ export type ModelInfo = {
  settings: ModelSettingParams
  parameters: ModelRuntimeParams
  engine?: InferenceEngine
+  proxyEngine?: InferenceEngine
 }

 /**
@ -18,7 +19,13 @@ export enum InferenceEngine {
  nitro = 'nitro',
  openai = 'openai',
  triton_trtllm = 'triton_trtllm',
-  hf_endpoint = 'hf_endpoint',
+
+  tool_retrieval_enabled = 'tool_retrieval_enabled',
+}
+
+export type ModelArtifact = {
+  filename: string
+  url: string
 }

 /**
@ -45,7 +52,7 @@ export type Model = {
  /**
   * The model download source. It can be an external url or a local filepath.
   */
-  source_url: string
+  sources: ModelArtifact[]

  /**
   * The model identifier, which can be referenced in the API endpoints.
@ -85,6 +92,13 @@ export type Model = {
   * The model engine.
   */
  engine: InferenceEngine
+
+  proxyEngine?: InferenceEngine
+
+  /**
+   * Is multimodal or not.
+   */
+  visionModel?: boolean
 }

 export type ModelMetadata = {
@ -107,6 +121,9 @@ export type ModelSettingParams = {
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
+  llama_model_path?: string
+  mmproj?: string
+  cont_batching?: boolean
 }

 /**
@ -122,4 +139,5 @@ export type ModelRuntimeParams = {
  stop?: string[]
  frequency_penalty?: number
  presence_penalty?: number
+  engine?: string
 }
--- a/core/src/types/thread/index.ts
+++ b/core/src/types/thread/index.ts
@ -1,2 +1,3 @@
 export * from './threadEntity'
 export * from './threadInterface'
+export * from './threadEvent'
--- a/core/src/types/thread/threadEntity.ts
+++ b/core/src/types/thread/threadEntity.ts
@ -1,3 +1,4 @@
+import { AssistantTool } from '../assistant'
 import { ModelInfo } from '../model'

 /**
@ -30,6 +31,7 @@ export type ThreadAssistantInfo = {
  assistant_name: string
  model: ModelInfo
  instructions?: string
+  tools?: AssistantTool[]
 }

 /**
@ -41,5 +43,4 @@ export type ThreadState = {
  waitingForResponse: boolean
  error?: Error
  lastMessage?: string
-  isFinishInit?: boolean
 }
--- a/core/src/types/thread/threadEvent.ts
+++ b/core/src/types/thread/threadEvent.ts
@ -0,0 +1,4 @@
+export enum ThreadEvent {
+  /** The `OnThreadStarted` event is emitted when a thread is started. */
+  OnThreadStarted = 'OnThreadStarted',
+}
--- a/docs/.env.example
+++ b/docs/.env.example
@ -1,5 +1,5 @@
 GTM_ID=xxxx
-POSTHOG_PROJECT_API_KEY=xxxx
-POSTHOG_APP_URL=xxxx
+UMAMI_PROJECT_API_KEY=xxxx
+UMAMI_APP_URL=xxxx
 ALGOLIA_API_KEY=xxxx
 ALGOLIA_APP_ID=xxxx
--- a/docs/docs/docs/03-engineering/models.md
+++ b/docs/docs/docs/03-engineering/models.md
@ -56,7 +56,6 @@ jan/                               # Jan root folder

 - Each `model` folder contains a `model.json` file, which is a representation of a model.
 - `model.json` contains metadata and default parameters used to run a model.
- The only required field is `source_url`.

 ### Example

@ -64,36 +63,43 @@ Here's a standard example `model.json` for a GGUF model.

 ```js
 {
-"id": "zephyr-7b",                  // Defaults to foldername
-"object": "model",                  // Defaults to "model"
-"source_url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf",
-"name": "Zephyr 7B",                // Defaults to foldername
-"owned_by": "you",                  // Defaults to "you"
-"version": "1",                     // Defaults to 1
-"created": 1231231,                 // Defaults to file creation time
-"description": null,                // Defaults to null
-"state": enum[null, "ready"]
-"format": "ggufv3",                 // Defaults to "ggufv3"
-"engine": "nitro",                // engine_id specified in jan/engine folder
-"engine_parameters": {              // Engine parameters inside model.json can override
-    "ctx_len": 2048,                // the value inside the base engine.json
+  "id": "zephyr-7b",        // Defaults to foldername
+  "object": "model",        // Defaults to "model"
+  "sources": [
+    {
+      "filename": "zephyr-7b-beta.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf"
+    }
+  ],
+  "name": "Zephyr 7B",      // Defaults to foldername
+  "owned_by": "you",        // Defaults to "you"
+  "version": "1",           // Defaults to 1
+  "created": 1231231,       // Defaults to file creation time
+  "description": null,      // Defaults to null
+  "format": "ggufv3",       // Defaults to "ggufv3"
+  "engine": "nitro",        // engine_id specified in jan/engine folder
+  "engine_parameters": {
+    // Engine parameters inside model.json can override
+    "ctx_len": 4096,        // the value inside the base engine.json
    "ngl": 100,
    "embedding": true,
-    "n_parallel": 4,
-},
-"model_parameters": {                     // Models are called parameters
+    "n_parallel": 4
+  },
+  "model_parameters": {
+    // Models are called parameters
    "stream": true,
-    "max_tokens": 2048,
-    "stop": ["<endofstring>"],      // This usually can be left blank, only used with specific need from model author
+    "max_tokens": 4096,
+    "stop": ["<endofstring>"], // This usually can be left blank, only used with specific need from model author
    "frequency_penalty": 0,
    "presence_penalty": 0,
    "temperature": 0.7,
    "top_p": 0.95
-},
-"metadata": {},                     // Defaults to {}
-"assets": [                         // Defaults to current dir
-    "file://.../zephyr-7b-q4_k_m.bin",
-]
+  },
+  "metadata": {},           // Defaults to {}
+  "assets": [
+    // Defaults to current dir
+    "file://.../zephyr-7b-q4_k_m.bin"
+  ]
 }
 ```

--- a/docs/docs/guides/04-using-models/02-import-manually.mdx
+++ b/docs/docs/guides/04-using-models/02-import-manually.mdx
@ -31,7 +31,6 @@ In this section, we will show you how to import a GGUF model from [HuggingFace](

 ## Manually Importing a Downloaded Model (nightly versions and v0.4.4+)

-
 ### 1. Create a Model Folder

 Navigate to the `~/jan/models` folder. You can find this folder by going to `App Settings` > `Advanced` > `Open App Directory`.
@ -92,7 +91,7 @@ Drag and drop your model binary into this folder, ensuring the `modelname.gguf`

 #### 3. Voila

-If your model doesn't show up in the Model Selector in conversations, please restart the app. 
+If your model doesn't show up in the Model Selector in conversations, please restart the app.

 If that doesn't work, please feel free to join our [Discord community](https://discord.gg/Dt7MxDyNNZ) for support, updates, and discussions.

@ -190,14 +189,18 @@ Edit `model.json` and include the following configurations:
 - Ensure the filename must be `model.json`.
 - Ensure the `id` property matches the folder name you created.
 - Ensure the GGUF filename should match the `id` property exactly.
- Ensure the `source_url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
+- Ensure the `source.url` property is the direct binary download link ending in `.gguf`. In HuggingFace, you can find the direct links in the `Files and versions` tab.
 - Ensure you are using the correct `prompt_template`. This is usually provided in the HuggingFace model's description page.
- Ensure the `state` property is set to `ready`.

 ```json title="model.json"
 {
  // highlight-start
-  "source_url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf",
+  "sources": [
+    {
+      "filename": "trinity-v1.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/trinity-v1-GGUF/resolve/main/trinity-v1.Q4_K_M.gguf"
+    }
+  ],
  "id": "trinity-v1-7b",
  // highlight-end
  "object": "model",
@ -208,7 +211,8 @@ Edit `model.json` and include the following configurations:
  "settings": {
    "ctx_len": 4096,
    // highlight-next-line
-    "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:"
+    "prompt_template": "{system_message}\n### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "trinity-v1.Q4_K_M.gguf"
  },
  "parameters": {
    "max_tokens": 4096
@ -218,9 +222,7 @@ Edit `model.json` and include the following configurations:
    "tags": ["7B", "Merged"],
    "size": 4370000000
  },
-  "engine": "nitro",
-  // highlight-next-line
-  "state": "ready"
+  "engine": "nitro"
 }
 ```

--- a/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
+++ b/docs/docs/guides/04-using-models/03-integrate-with-remote-server.mdx
@ -40,7 +40,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`

 ```json title="~/jan/models/gpt-3.5-turbo-16k/model.json"
 {
-  "source_url": "https://openai.com",
+  "sources": [
+    {
+      "filename": "openai",
+      "url": "https://openai.com"
+    }
+  ],
  // highlight-next-line
  "id": "gpt-3.5-turbo-16k",
  "object": "model",
@ -55,8 +60,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `gpt-3.5-turbo-16k`
    "author": "OpenAI",
    "tags": ["General", "Big Context Length"]
  },
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
@ -118,7 +122,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`

 ```json title="~/jan/models/mistral-ins-7b-q4/model.json"
 {
-  "source_url": "https://jan.ai",
+  "sources": [
+    {
+      "filename": "janai",
+      "url": "https://jan.ai"
+    }
+  ],
  // highlight-next-line
  "id": "mistral-ins-7b-q4",
  "object": "model",
@ -134,8 +143,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `mistral-ins-7b-q4`
    "tags": ["remote", "awesome"]
  },
  // highlight-start
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
--- a/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx
+++ b/docs/docs/guides/07-integrations/02-integrate-openrouter.mdx
@ -49,7 +49,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln

 ```json title="~/jan/models/openrouter-dolphin-mixtral-8x7b/model.json"
 {
-  "source_url": "https://openrouter.ai/",
+  "sources": [
+    {
+      "filename": "openrouter",
+      "url": "https://openrouter.ai/"
+    }
+  ],
  "id": "cognitivecomputations/dolphin-mixtral-8x7b",
  "object": "model",
  "name": "Dolphin 2.6 Mixtral 8x7B",
@ -63,8 +68,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<openrouter-modeln
    "tags": ["General", "Big Context Length"]
  },
  // highlight-start
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
--- a/docs/docs/guides/07-integrations/03-integrate-azure-openai-service.mdx
+++ b/docs/docs/guides/07-integrations/03-integrate-azure-openai-service.mdx
@ -59,7 +59,12 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n

 ```json title="~/jan/models/gpt-35-hieu-jan/model.json"
 {
-  "source_url": "https://hieujan.openai.azure.com",
+  "sources": [
+    {
+      "filename": "azure_openai",
+      "url": "https://hieujan.openai.azure.com"
+    }
+  ],
  // highlight-next-line
  "id": "gpt-35-hieu-jan",
  "object": "model",
@ -75,8 +80,7 @@ Navigate to the `~/jan/models` folder. Create a folder named `<your-deployment-n
    "tags": ["General", "Big Context Length"]
  },
  // highlight-start
-  "engine": "openai",
-  "state": "ready"
+  "engine": "openai"
  // highlight-end
 }
 ```
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@ -59,18 +59,26 @@ components:
          type: string
          description: State format of the model, distinct from the engine.
          example: ggufv3
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+        source:
+          type: array
+          items:
+            type: object
+            properties:
+              url:
+                format: uri
+                description: URL to the source of the model.
+                example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+              filename:
+                type: string
+                description: Filename of the model.
+                example: zephyr-7b-beta.Q4_K_M.gguf
        settings:
          type: object
          properties:
            ctx_len:
              type: string
              description: Context length.
-              example: "2048"
+              example: "4096"
            ngl:
              type: string
              description: Number of layers.
@ -94,7 +102,7 @@ components:
            token_limit:
              type: string
              description: Token limit for the model.
-              example: "2048"
+              example: "4096"
            top_k:
              type: string
              description: Top-k setting for the model.
@ -117,7 +125,7 @@ components:
            type: string
          description: List of assets related to the model.
      required:
-        - source_url
+        - source
    ModelObject:
      type: object
      properties:
@ -169,11 +177,19 @@ components:
            - running
            - stopped
          description: The current state of the model.
-        source_url:
-          type: string
-          format: uri
-          description: URL to the source of the model.
-          example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+        source:
+          type: array
+          items:
+            type: object
+            properties:
+              url:
+                format: uri
+                description: URL to the source of the model.
+                example: https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/blob/main/zephyr-7b-beta.Q4_K_M.gguf
+              filename:
+                type: string
+                description: Filename of the model.
+                example: zephyr-7b-beta.Q4_K_M.gguf
        engine_parameters:
          type: object
          properties:
@ -208,8 +224,8 @@ components:
                model.
              minimum: 128
              maximum: 4096
-              default: 2048
-              example: 2048
+              default: 4096
+              example: 4096
            n_parallel:
              type: integer
              description: Number of parallel operations, relevant when continuous batching is
@ -241,8 +257,8 @@ components:
              description: Maximum context length the model can handle.
              minimum: 0
              maximum: 4096
-              default: 2048
-              example: 2048
+              default: 4096
+              example: 4096
            ngl:
              type: integer
              description: Number of layers in the neural network.
@ -276,8 +292,8 @@ components:
                response.
              minimum: 1
              maximum: 4096
-              default: 2048
-              example: 2048
+              default: 4096
+              example: 4096
            top_k:
              type: integer
              description: Limits the model to consider only the top k most likely next tokens
@ -318,7 +334,7 @@ components:
        - created
        - owned_by
        - state
-        - source_url
+        - source
        - parameters
        - metadata
    DeleteModelResponse:
--- a/electron/handlers/app.ts
+++ b/electron/handlers/app.ts
@ -1,5 +1,5 @@
 import { app, ipcMain, dialog, shell } from 'electron'
-import { join, basename } from 'path'
+import { join, basename, relative as getRelative, isAbsolute } from 'path'
 import { WindowManager } from './../managers/window'
 import { getResourcePath } from './../utils/path'
 import { AppRoute, AppConfiguration } from '@janhq/core'
@ -50,6 +50,27 @@ export function handleAppIPCs() {
    join(...paths)
  )

+  /**
+   * Checks if the given path is a subdirectory of the given directory.
+   *
+   * @param _event - The IPC event object.
+   * @param from - The path to check.
+   * @param to - The directory to check against.
+   *
+   * @returns {Promise<boolean>} - A promise that resolves with the result.
+   */
+  ipcMain.handle(
+    AppRoute.isSubdirectory,
+    async (_event, from: string, to: string) => {
+      const relative = getRelative(from, to)
+      const isSubdir =
+        relative && !relative.startsWith('..') && !isAbsolute(relative)
+
+      if (isSubdir === '') return false
+      else return isSubdir
+    }
+  )
+
  /**
   * Retrieve basename from given path, respect to the current OS.
   */
--- a/electron/handlers/fileManager.ts
+++ b/electron/handlers/fileManager.ts
@ -1,4 +1,4 @@
-import { ipcMain } from 'electron'
+import { ipcMain, app } from 'electron'
 // @ts-ignore
 import reflect from '@alumna/reflect'

@ -38,6 +38,10 @@ export function handleFileMangerIPCs() {
    getResourcePath()
  )

+  ipcMain.handle(FileManagerRoute.getUserHomePath, async (_event) =>
+    app.getPath('home')
+  )
+
  // handle fs is directory here
  ipcMain.handle(
    FileManagerRoute.fileStat,
@ -59,4 +63,20 @@ export function handleFileMangerIPCs() {
      return fileStat
    }
  )
+
+  ipcMain.handle(
+    FileManagerRoute.writeBlob,
+    async (_event, path: string, data: string): Promise<void> => {
+      try {
+        const normalizedPath = normalizeFilePath(path)
+        const dataBuffer = Buffer.from(data, 'base64')
+        fs.writeFileSync(
+          join(getJanDataFolderPath(), normalizedPath),
+          dataBuffer
+        )
+      } catch (err) {
+        console.error(`writeFile ${path} result: ${err}`)
+      }
+    }
+  )
 }
--- a/electron/handlers/fs.ts
+++ b/electron/handlers/fs.ts
@ -1,9 +1,9 @@
 import { ipcMain } from 'electron'

-import { FileSystemRoute } from '@janhq/core'
-import { join } from 'path'
 import { getJanDataFolderPath, normalizeFilePath } from '@janhq/core/node'
-
+import fs from 'fs'
+import { FileManagerRoute, FileSystemRoute } from '@janhq/core'
+import { join } from 'path'
 /**
 * Handles file system operations.
 */
@ -15,7 +15,7 @@ export function handleFsIPCs() {
        mdl[route](
          ...args.map((arg) =>
            typeof arg === 'string' &&
-            (arg.includes(`file:/`) || arg.includes(`file:\\`))
+            (arg.startsWith(`file:/`) || arg.startsWith(`file:\\`))
              ? join(getJanDataFolderPath(), normalizeFilePath(arg))
              : arg
          )
--- a/electron/main.ts
+++ b/electron/main.ts
@ -28,6 +28,22 @@ import { setupCore } from './utils/setup'

 app
  .whenReady()
+  .then(async () => {
+    if (!app.isPackaged) {
+      // Which means you're running from source code
+      const { default: installExtension, REACT_DEVELOPER_TOOLS } = await import(
+        'electron-devtools-installer'
+      ) // Don't use import on top level, since the installer package is dev-only
+      try {
+        const name = installExtension(REACT_DEVELOPER_TOOLS)
+        console.log(`Added Extension: ${name}`)
+      } catch (err) {
+        console.log('An error occurred while installing devtools:')
+        console.error(err)
+        // Only log the error and don't throw it because it's not critical
+      }
+    }
+  })
  .then(setupCore)
  .then(createUserSpace)
  .then(migrateExtensions)
--- a/electron/package.json
+++ b/electron/package.json
@ -86,7 +86,7 @@
    "request": "^2.88.2",
    "request-progress": "^3.0.0",
    "rimraf": "^5.0.5",
-    "typescript": "^5.3.3",
+    "typescript": "^5.2.2",
    "ulid": "^2.3.0",
    "use-debounce": "^9.0.4"
  },
@ -99,6 +99,7 @@
    "@typescript-eslint/parser": "^6.7.3",
    "electron": "28.0.0",
    "electron-builder": "^24.9.1",
+    "electron-devtools-installer": "^3.2.0",
    "electron-playwright-helpers": "^1.6.0",
    "eslint-plugin-react": "^7.33.2",
    "run-script-os": "^1.1.6"
--- a/electron/playwright.config.ts
+++ b/electron/playwright.config.ts
@ -1,9 +1,9 @@
-import { PlaywrightTestConfig } from "@playwright/test";
+import { PlaywrightTestConfig } from '@playwright/test'

 const config: PlaywrightTestConfig = {
-  testDir: "./tests",
+  testDir: './tests',
  retries: 0,
-  timeout: 120000,
-};
+  globalTimeout: 300000,
+}

-export default config;
+export default config
--- a/electron/tests/explore.e2e.spec.ts
+++ b/electron/tests/explore.e2e.spec.ts
@ -9,6 +9,7 @@ import {

 let electronApp: ElectronApplication
 let page: Page
+const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')

 test.beforeAll(async () => {
  process.env.CI = 'e2e'
@ -26,7 +27,9 @@ test.beforeAll(async () => {
  })
  await stubDialog(electronApp, 'showMessageBox', { response: 1 })

-  page = await electronApp.firstWindow()
+  page = await electronApp.firstWindow({
+    timeout: TIMEOUT,
+  })
 })

 test.afterAll(async () => {
@ -34,8 +37,12 @@ test.afterAll(async () => {
  await page.close()
 })

-test('explores models', async () => {
-  await page.getByTestId('Hub').first().click()
-  await page.getByTestId('testid-explore-models').isVisible()
-  //   More test cases here...
+test('explores hub', async () => {
+  test.setTimeout(TIMEOUT)
+  await page.getByTestId('Hub').first().click({
+    timeout: TIMEOUT,
+  })
+  await page.getByTestId('hub-container-test-id').isVisible({
+    timeout: TIMEOUT,
+  })
 })
--- a/electron/tests/main.e2e.spec.ts
+++ b/electron/tests/main.e2e.spec.ts
@ -1,55 +0,0 @@
-import { _electron as electron } from 'playwright'
-import { ElectronApplication, Page, expect, test } from '@playwright/test'
-
-import {
-  findLatestBuild,
-  parseElectronApp,
-  stubDialog,
-} from 'electron-playwright-helpers'
-
-let electronApp: ElectronApplication
-let page: Page
-
-test.beforeAll(async () => {
-  process.env.CI = 'e2e'
-
-  const latestBuild = findLatestBuild('dist')
-  expect(latestBuild).toBeTruthy()
-
-  // parse the packaged Electron app and find paths and other info
-  const appInfo = parseElectronApp(latestBuild)
-  expect(appInfo).toBeTruthy()
-  expect(appInfo.asar).toBe(true)
-  expect(appInfo.executable).toBeTruthy()
-  expect(appInfo.main).toBeTruthy()
-  expect(appInfo.name).toBe('jan')
-  expect(appInfo.packageJson).toBeTruthy()
-  expect(appInfo.packageJson.name).toBe('jan')
-  expect(appInfo.platform).toBeTruthy()
-  expect(appInfo.platform).toBe(process.platform)
-  expect(appInfo.resourcesDir).toBeTruthy()
-
-  electronApp = await electron.launch({
-    args: [appInfo.main], // main file from package.json
-    executablePath: appInfo.executable, // path to the Electron executable
-  })
-  await stubDialog(electronApp, 'showMessageBox', { response: 1 })
-
-  page = await electronApp.firstWindow()
-})
-
-test.afterAll(async () => {
-  await electronApp.close()
-  await page.close()
-})
-
-test('renders the home page', async () => {
-  expect(page).toBeDefined()
-
-  // Welcome text is available
-  const welcomeText = await page
-    .getByTestId('testid-welcome-title')
-    .first()
-    .isVisible()
-  expect(welcomeText).toBe(false)
-})
--- a/electron/tests/navigation.e2e.spec.ts
+++ b/electron/tests/navigation.e2e.spec.ts
@ -9,6 +9,7 @@ import {

 let electronApp: ElectronApplication
 let page: Page
+const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')

 test.beforeAll(async () => {
  process.env.CI = 'e2e'
@ -26,7 +27,9 @@ test.beforeAll(async () => {
  })
  await stubDialog(electronApp, 'showMessageBox', { response: 1 })

-  page = await electronApp.firstWindow()
+  page = await electronApp.firstWindow({
+    timeout: TIMEOUT,
+  })
 })

 test.afterAll(async () => {
@ -35,20 +38,24 @@ test.afterAll(async () => {
 })

 test('renders left navigation panel', async () => {
-  // Chat section should be there
-  const chatSection = await page.getByTestId('Chat').first().isVisible()
-  expect(chatSection).toBe(false)
-
-  // Home actions
-  /* Disable unstable feature tests
-   ** const botBtn = await page.getByTestId("Bot").first().isEnabled();
-   ** Enable back when it is whitelisted
-   */
-
+  test.setTimeout(TIMEOUT)
  const systemMonitorBtn = await page
    .getByTestId('System Monitor')
    .first()
-    .isEnabled()
-  const settingsBtn = await page.getByTestId('Settings').first().isEnabled()
+    .isEnabled({
+      timeout: TIMEOUT,
+    })
+  const settingsBtn = await page
+    .getByTestId('Thread')
+    .first()
+    .isEnabled({ timeout: TIMEOUT })
  expect([systemMonitorBtn, settingsBtn].filter((e) => !e).length).toBe(0)
+  // Chat section should be there
+  await page.getByTestId('Local API Server').first().click({
+    timeout: TIMEOUT,
+  })
+  const localServer = await page.getByTestId('local-server-testid').first()
+  await expect(localServer).toBeVisible({
+    timeout: TIMEOUT,
+  })
 })
--- a/electron/tests/settings.e2e.spec.ts
+++ b/electron/tests/settings.e2e.spec.ts
@ -9,6 +9,7 @@ import {

 let electronApp: ElectronApplication
 let page: Page
+const TIMEOUT: number = parseInt(process.env.TEST_TIMEOUT || '300000')

 test.beforeAll(async () => {
  process.env.CI = 'e2e'
@ -26,7 +27,9 @@ test.beforeAll(async () => {
  })
  await stubDialog(electronApp, 'showMessageBox', { response: 1 })

-  page = await electronApp.firstWindow()
+  page = await electronApp.firstWindow({
+    timeout: TIMEOUT,
+  })
 })

 test.afterAll(async () => {
@ -35,6 +38,8 @@ test.afterAll(async () => {
 })

 test('shows settings', async () => {
-  await page.getByTestId('Settings').first().click()
-  await page.getByTestId('testid-setting-description').isVisible()
+  test.setTimeout(TIMEOUT)
+  await page.getByTestId('Settings').first().click({ timeout: TIMEOUT })
+  const settingDescription = page.getByTestId('testid-setting-description')
+  await expect(settingDescription).toBeVisible({ timeout: TIMEOUT })
 })
--- a/electron/tests/system-monitor.e2e.spec.ts
+++ b/electron/tests/system-monitor.e2e.spec.ts
@ -1,41 +0,0 @@
-import { _electron as electron } from 'playwright'
-import { ElectronApplication, Page, expect, test } from '@playwright/test'
-
-import {
-  findLatestBuild,
-  parseElectronApp,
-  stubDialog,
-} from 'electron-playwright-helpers'
-
-let electronApp: ElectronApplication
-let page: Page
-
-test.beforeAll(async () => {
-  process.env.CI = 'e2e'
-
-  const latestBuild = findLatestBuild('dist')
-  expect(latestBuild).toBeTruthy()
-
-  // parse the packaged Electron app and find paths and other info
-  const appInfo = parseElectronApp(latestBuild)
-  expect(appInfo).toBeTruthy()
-
-  electronApp = await electron.launch({
-    args: [appInfo.main], // main file from package.json
-    executablePath: appInfo.executable, // path to the Electron executable
-  })
-  await stubDialog(electronApp, 'showMessageBox', { response: 1 })
-
-  page = await electronApp.firstWindow()
-})
-
-test.afterAll(async () => {
-  await electronApp.close()
-  await page.close()
-})
-
-test('shows system monitor', async () => {
-  await page.getByTestId('System Monitor').first().click()
-  await page.getByTestId('testid-system-monitor').isVisible()
-  //   More test cases here...
-})
--- a/extensions/assistant-extension/package.json
+++ b/extensions/assistant-extension/package.json
@ -3,26 +3,50 @@
  "version": "1.0.0",
  "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
  "main": "dist/index.js",
-  "module": "dist/module.js",
+  "node": "dist/node/index.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
-    "build": "tsc -b . && webpack --config webpack.config.js",
-    "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install"
+    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
+    "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && ../../.github/scripts/auto-sign.sh && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish": "run-script-os"
  },
  "devDependencies": {
+    "@rollup/plugin-commonjs": "^25.0.7",
+    "@rollup/plugin-json": "^6.1.0",
+    "@rollup/plugin-node-resolve": "^15.2.3",
+    "@rollup/plugin-replace": "^5.0.5",
+    "@types/pdf-parse": "^1.1.4",
+    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
-    "webpack": "^5.88.2",
-    "webpack-cli": "^5.1.4"
+    "rollup": "^2.38.5",
+    "rollup-plugin-define": "^1.0.1",
+    "rollup-plugin-sourcemaps": "^0.6.3",
+    "rollup-plugin-typescript2": "^0.36.0",
+    "typescript": "^5.3.3",
+    "run-script-os": "^1.1.6"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
+    "@langchain/community": "0.0.13",
+    "hnswlib-node": "^1.4.2",
+    "langchain": "^0.0.214",
    "path-browserify": "^1.0.1",
+    "pdf-parse": "^1.1.1",
    "ts-loader": "^9.5.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
+  ],
+  "bundleDependencies": [
+    "@janhq/core",
+    "@langchain/community",
+    "hnswlib-node",
+    "langchain",
+    "pdf-parse"
  ]
 }
--- a/extensions/assistant-extension/rollup.config.ts
+++ b/extensions/assistant-extension/rollup.config.ts
@ -0,0 +1,81 @@
+import resolve from "@rollup/plugin-node-resolve";
+import commonjs from "@rollup/plugin-commonjs";
+import sourceMaps from "rollup-plugin-sourcemaps";
+import typescript from "rollup-plugin-typescript2";
+import json from "@rollup/plugin-json";
+import replace from "@rollup/plugin-replace";
+
+const packageJson = require("./package.json");
+
+const pkg = require("./package.json");
+
+export default [
+  {
+    input: `src/index.ts`,
+    output: [{ file: pkg.main, format: "es", sourcemap: true }],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: [],
+    watch: {
+      include: "src/**",
+    },
+    plugins: [
+      replace({
+        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
+        EXTENSION_NAME: JSON.stringify(packageJson.name),
+        VERSION: JSON.stringify(packageJson.version),
+      }),
+      // Allow json resolution
+      json(),
+      //     Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Compile TypeScript files
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs(),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: [".js", ".ts", ".svelte"],
+      }),
+
+      // Resolve source maps to the original source
+      sourceMaps(),
+    ],
+  },
+  {
+    input: `src/node/index.ts`,
+    output: [{ dir: "dist/node", format: "cjs", sourcemap: false }],
+    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
+    external: [
+      "@janhq/core/node",
+      "@langchain/community",
+      "langchain",
+      "langsmith",
+      "path",
+      "hnswlib-node",
+    ],
+    watch: {
+      include: "src/node/**",
+    },
+    // inlineDynamicImports: true,
+    plugins: [
+      // Allow json resolution
+      json(),
+      // Compile TypeScript files
+      typescript({ useTsconfigDeclarationDir: true }),
+      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
+      commonjs({
+        ignoreDynamicRequires: true,
+      }),
+      // Allow node_modules resolution, so you can use 'external' to control
+      // which external modules to include in the bundle
+      // https://github.com/rollup/rollup-plugin-node-resolve#usage
+      resolve({
+        extensions: [".ts", ".js", ".json"],
+      }),
+
+      // Resolve source maps to the original source
+      // sourceMaps(),
+    ],
+  },
+];
--- a/extensions/assistant-extension/src/@types/global.d.ts
+++ b/extensions/assistant-extension/src/@types/global.d.ts
@ -1 +1,3 @@
-declare const MODULE: string;
+declare const NODE: string;
+declare const EXTENSION_NAME: string;
+declare const VERSION: string;
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@ -1,16 +1,151 @@
-import { fs, Assistant } from "@janhq/core";
-import { AssistantExtension } from "@janhq/core";
-import { join } from "path";
+import {
+  fs,
+  Assistant,
+  MessageRequest,
+  events,
+  InferenceEngine,
+  MessageEvent,
+  InferenceEvent,
+  joinPath,
+  executeOnMain,
+  AssistantExtension,
+} from "@janhq/core";

 export default class JanAssistantExtension extends AssistantExtension {
  private static readonly _homeDir = "file://assistants";

+  controller = new AbortController();
+  isCancelled = false;
+  retrievalThreadId: string | undefined = undefined;
+
  async onLoad() {
    // making the assistant directory
-    if (!(await fs.existsSync(JanAssistantExtension._homeDir)))
-      fs.mkdirSync(JanAssistantExtension._homeDir).then(() => {
-        this.createJanAssistant();
-      });
+    const assistantDirExist = await fs.existsSync(
+      JanAssistantExtension._homeDir,
+    );
+    if (
+      localStorage.getItem(`${EXTENSION_NAME}-version`) !== VERSION ||
+      !assistantDirExist
+    ) {
+      if (!assistantDirExist)
+        await fs.mkdirSync(JanAssistantExtension._homeDir);
+
+      // Write assistant metadata
+      this.createJanAssistant();
+      // Finished migration
+      localStorage.setItem(`${EXTENSION_NAME}-version`, VERSION);
+    }
+
+    // Events subscription
+    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
+      JanAssistantExtension.handleMessageRequest(data, this),
+    );
+
+    events.on(InferenceEvent.OnInferenceStopped, () => {
+      JanAssistantExtension.handleInferenceStopped(this);
+    });
+  }
+
+  private static async handleInferenceStopped(instance: JanAssistantExtension) {
+    instance.isCancelled = true;
+    instance.controller?.abort();
+  }
+
+  private static async handleMessageRequest(
+    data: MessageRequest,
+    instance: JanAssistantExtension,
+  ) {
+    instance.isCancelled = false;
+    instance.controller = new AbortController();
+
+    if (
+      data.model?.engine !== InferenceEngine.tool_retrieval_enabled ||
+      !data.messages ||
+      !data.thread?.assistants[0]?.tools
+    ) {
+      return;
+    }
+
+    const latestMessage = data.messages[data.messages.length - 1];
+
+    // Ingest the document if needed
+    if (
+      latestMessage &&
+      latestMessage.content &&
+      typeof latestMessage.content !== "string"
+    ) {
+      const docFile = latestMessage.content[1]?.doc_url?.url;
+      if (docFile) {
+        await executeOnMain(
+          NODE,
+          "toolRetrievalIngestNewDocument",
+          docFile,
+          data.model?.proxyEngine,
+        );
+      }
+    }
+
+    // Load agent on thread changed
+    if (instance.retrievalThreadId !== data.threadId) {
+      await executeOnMain(NODE, "toolRetrievalLoadThreadMemory", data.threadId);
+
+      instance.retrievalThreadId = data.threadId;
+
+      // Update the text splitter
+      await executeOnMain(
+        NODE,
+        "toolRetrievalUpdateTextSplitter",
+        data.thread.assistants[0].tools[0]?.settings?.chunk_size ?? 4000,
+        data.thread.assistants[0].tools[0]?.settings?.chunk_overlap ?? 200,
+      );
+    }
+
+    if (latestMessage.content) {
+      const prompt =
+        typeof latestMessage.content === "string"
+          ? latestMessage.content
+          : latestMessage.content[0].text;
+      // Retrieve the result
+      console.debug("toolRetrievalQuery", latestMessage.content);
+      const retrievalResult = await executeOnMain(
+        NODE,
+        "toolRetrievalQueryResult",
+        prompt,
+      );
+
+      // Update the message content
+      // Using the retrieval template with the result and query
+      if (data.thread?.assistants[0].tools)
+        data.messages[data.messages.length - 1].content =
+          data.thread.assistants[0].tools[0].settings?.retrieval_template
+            ?.replace("{CONTEXT}", retrievalResult)
+            .replace("{QUESTION}", prompt);
+    }
+
+    // Filter out all the messages that are not text
+    data.messages = data.messages.map((message) => {
+      if (
+        message.content &&
+        typeof message.content !== "string" &&
+        (message.content.length ?? 0) > 0
+      ) {
+        return {
+          ...message,
+          content: [message.content[0]],
+        };
+      }
+      return message;
+    });
+
+    // Reroute the result to inference engine
+    const output = {
+      ...data,
+      model: {
+        ...data.model,
+        engine: data.model.proxyEngine,
+      },
+    };
+    events.emit(MessageEvent.OnMessageSent, output);
  }

  /**
@ -19,15 +154,21 @@ export default class JanAssistantExtension extends AssistantExtension {
  onUnload(): void {}

  async createAssistant(assistant: Assistant): Promise<void> {
-    const assistantDir = join(JanAssistantExtension._homeDir, assistant.id);
+    const assistantDir = await joinPath([
+      JanAssistantExtension._homeDir,
+      assistant.id,
+    ]);
    if (!(await fs.existsSync(assistantDir))) await fs.mkdirSync(assistantDir);

    // store the assistant metadata json
-    const assistantMetadataPath = join(assistantDir, "assistant.json");
+    const assistantMetadataPath = await joinPath([
+      assistantDir,
+      "assistant.json",
+    ]);
    try {
      await fs.writeFileSync(
        assistantMetadataPath,
-        JSON.stringify(assistant, null, 2)
+        JSON.stringify(assistant, null, 2),
      );
    } catch (err) {
      console.error(err);
@ -39,14 +180,17 @@ export default class JanAssistantExtension extends AssistantExtension {
    // get all the assistant metadata json
    const results: Assistant[] = [];
    const allFileName: string[] = await fs.readdirSync(
-      JanAssistantExtension._homeDir
+      JanAssistantExtension._homeDir,
    );
    for (const fileName of allFileName) {
-      const filePath = join(JanAssistantExtension._homeDir, fileName);
+      const filePath = await joinPath([
+        JanAssistantExtension._homeDir,
+        fileName,
+      ]);

      if (filePath.includes(".DS_Store")) continue;
      const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
-        (file: string) => file === "assistant.json"
+        (file: string) => file === "assistant.json",
      );

      if (jsonFiles.length !== 1) {
@ -55,8 +199,8 @@ export default class JanAssistantExtension extends AssistantExtension {
      }

      const content = await fs.readFileSync(
-        join(filePath, jsonFiles[0]),
-        "utf-8"
+        await joinPath([filePath, jsonFiles[0]]),
+        "utf-8",
      );
      const assistant: Assistant =
        typeof content === "object" ? content : JSON.parse(content);
@ -73,7 +217,10 @@ export default class JanAssistantExtension extends AssistantExtension {
    }

    // remove the directory
-    const assistantDir = join(JanAssistantExtension._homeDir, assistant.id);
+    const assistantDir = await joinPath([
+      JanAssistantExtension._homeDir,
+      assistant.id,
+    ]);
    await fs.rmdirSync(assistantDir);
    return Promise.resolve();
  }
@ -89,7 +236,24 @@ export default class JanAssistantExtension extends AssistantExtension {
      description: "A default assistant that can use all downloaded models",
      model: "*",
      instructions: "",
-      tools: undefined,
+      tools: [
+        {
+          type: "retrieval",
+          enabled: false,
+          settings: {
+            top_k: 2,
+            chunk_size: 1024,
+            chunk_overlap: 64,
+            retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+            ----------------
+            CONTEXT: {CONTEXT}
+            ----------------
+            QUESTION: {QUESTION}
+            ----------------
+            Helpful Answer:`,
+          },
+        },
+      ],
      file_ids: [],
      metadata: undefined,
    };
--- a/extensions/assistant-extension/src/node/engine.ts
+++ b/extensions/assistant-extension/src/node/engine.ts
@ -0,0 +1,13 @@
+import fs from "fs";
+import path from "path";
+import { getJanDataFolderPath } from "@janhq/core/node";
+
+// Sec: Do not send engine settings over requests
+// Read it manually instead
+export const readEmbeddingEngine = (engineName: string) => {
+  const engineSettings = fs.readFileSync(
+    path.join(getJanDataFolderPath(), "engines", `${engineName}.json`),
+    "utf-8",
+  );
+  return JSON.parse(engineSettings);
+};
--- a/extensions/assistant-extension/src/node/index.ts
+++ b/extensions/assistant-extension/src/node/index.ts
@ -0,0 +1,39 @@
+import { getJanDataFolderPath, normalizeFilePath } from "@janhq/core/node";
+import { Retrieval } from "./tools/retrieval";
+import path from "path";
+
+const retrieval = new Retrieval();
+
+export async function toolRetrievalUpdateTextSplitter(
+  chunkSize: number,
+  chunkOverlap: number,
+) {
+  retrieval.updateTextSplitter(chunkSize, chunkOverlap);
+  return Promise.resolve();
+}
+export async function toolRetrievalIngestNewDocument(
+  file: string,
+  engine: string,
+) {
+  const filePath = path.join(getJanDataFolderPath(), normalizeFilePath(file));
+  const threadPath = path.dirname(filePath.replace("files", ""));
+  retrieval.updateEmbeddingEngine(engine);
+  await retrieval.ingestAgentKnowledge(filePath, `${threadPath}/memory`);
+  return Promise.resolve();
+}
+
+export async function toolRetrievalLoadThreadMemory(threadId: string) {
+  try {
+    await retrieval.loadRetrievalAgent(
+      path.join(getJanDataFolderPath(), "threads", threadId, "memory"),
+    );
+    return Promise.resolve();
+  } catch (err) {
+    console.debug(err);
+  }
+}
+
+export async function toolRetrievalQueryResult(query: string) {
+  const res = await retrieval.generateResult(query);
+  return Promise.resolve(res);
+}
--- a/extensions/assistant-extension/src/node/tools/retrieval/index.ts
+++ b/extensions/assistant-extension/src/node/tools/retrieval/index.ts
@ -0,0 +1,79 @@
+import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
+import { formatDocumentsAsString } from "langchain/util/document";
+import { PDFLoader } from "langchain/document_loaders/fs/pdf";
+
+import { HNSWLib } from "langchain/vectorstores/hnswlib";
+
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { readEmbeddingEngine } from "../../engine";
+
+export class Retrieval {
+  public chunkSize: number = 100;
+  public chunkOverlap?: number = 0;
+  private retriever: any;
+
+  private embeddingModel?: OpenAIEmbeddings = undefined;
+  private textSplitter?: RecursiveCharacterTextSplitter;
+
+  constructor(chunkSize: number = 4000, chunkOverlap: number = 200) {
+    this.updateTextSplitter(chunkSize, chunkOverlap);
+  }
+
+  public updateTextSplitter(chunkSize: number, chunkOverlap: number): void {
+    this.chunkSize = chunkSize;
+    this.chunkOverlap = chunkOverlap;
+    this.textSplitter = new RecursiveCharacterTextSplitter({
+      chunkSize: chunkSize,
+      chunkOverlap: chunkOverlap,
+    });
+  }
+
+  public updateEmbeddingEngine(engine: string): void {
+    // Engine settings are not compatible with the current embedding model params
+    // Switch case manually for now
+    const settings = readEmbeddingEngine(engine);
+    if (engine === "nitro") {
+      this.embeddingModel = new OpenAIEmbeddings(
+        { openAIApiKey: "nitro-embedding" },
+        { basePath: "http://127.0.0.1:3928/v1" }
+      );
+    } else {
+      // Fallback to OpenAI Settings
+      this.embeddingModel = new OpenAIEmbeddings({
+        configuration: {
+          apiKey: settings.api_key,
+        },
+      });
+    }
+  }
+
+  public ingestAgentKnowledge = async (
+    filePath: string,
+    memoryPath: string
+  ): Promise<any> => {
+    const loader = new PDFLoader(filePath, {
+      splitPages: true,
+    });
+    if (!this.embeddingModel) return Promise.reject();
+    const doc = await loader.load();
+    const docs = await this.textSplitter!.splitDocuments(doc);
+    const vectorStore = await HNSWLib.fromDocuments(docs, this.embeddingModel);
+    return vectorStore.save(memoryPath);
+  };
+
+  public loadRetrievalAgent = async (memoryPath: string): Promise<void> => {
+    if (!this.embeddingModel) return Promise.reject();
+    const vectorStore = await HNSWLib.load(memoryPath, this.embeddingModel);
+    this.retriever = vectorStore.asRetriever(2);
+    return Promise.resolve();
+  };
+
+  public generateResult = async (query: string): Promise<string> => {
+    if (!this.retriever) {
+      return Promise.resolve(" ");
+    }
+    const relevantDocs = await this.retriever.getRelevantDocuments(query);
+    const serializedDoc = formatDocumentsAsString(relevantDocs);
+    return Promise.resolve(serializedDoc);
+  };
+}
--- a/extensions/assistant-extension/tsconfig.json
+++ b/extensions/assistant-extension/tsconfig.json
@ -1,14 +1,20 @@
 {
  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
+    "target": "es5",
+    "module": "ES2020",
+    "lib": ["es2015", "es2016", "es2017", "dom"],
+    "strict": true,
+    "sourceMap": true,
+    "declaration": true,
+    "allowSyntheticDefaultImports": true,
+    "experimentalDecorators": true,
+    "emitDecoratorMetadata": true,
+    "declarationDir": "dist/types",
+    "outDir": "dist",
+    "importHelpers": true,
+    "typeRoots": ["node_modules/@types"],
    "skipLibCheck": true,
-    "rootDir": "./src"
  },
-  "include": ["./src"]
+  "include": ["src"],
 }
--- a/extensions/assistant-extension/webpack.config.js
+++ b/extensions/assistant-extension/webpack.config.js
@ -1,38 +0,0 @@
-const path = require("path");
-const webpack = require("webpack");
-const packageJson = require("./package.json");
-
-module.exports = {
-  experiments: { outputModule: true },
-  entry: "./src/index.ts", // Adjust the entry point to match your project's main file
-  mode: "production",
-  module: {
-    rules: [
-      {
-        test: /\.tsx?$/,
-        use: "ts-loader",
-        exclude: /node_modules/,
-      },
-    ],
-  },
-  output: {
-    filename: "index.js", // Adjust the output file name as needed
-    path: path.resolve(__dirname, "dist"),
-    library: { type: "module" }, // Specify ESM output format
-  },
-  plugins: [
-    new webpack.DefinePlugin({
-      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
-    }),
-  ],
-  resolve: {
-    extensions: [".ts", ".js"],
-    fallback: {
-      path: require.resolve("path-browserify"),
-    },
-  },
-  optimization: {
-    minimize: false,
-  },
-  // Add loaders and other configuration as needed for your project
-};
--- a/extensions/conversational-extension/src/index.ts
+++ b/extensions/conversational-extension/src/index.ts
@ -4,15 +4,14 @@ import {
  ConversationalExtension,
  Thread,
  ThreadMessage,
+  events,
 } from '@janhq/core'

 /**
 * JSONConversationalExtension is a ConversationalExtension implementation that provides
 * functionality for managing threads.
 */
-export default class JSONConversationalExtension
-  extends ConversationalExtension
-{
+export default class JSONConversationalExtension extends ConversationalExtension {
  private static readonly _homeDir = 'file://threads'
  private static readonly _threadInfoFileName = 'thread.json'
  private static readonly _threadMessagesFileName = 'messages.jsonl'
@ -119,6 +118,33 @@ export default class JSONConversationalExtension
      ])
      if (!(await fs.existsSync(threadDirPath)))
        await fs.mkdirSync(threadDirPath)
+
+      if (message.content[0]?.type === 'image') {
+        const filesPath = await joinPath([threadDirPath, 'files'])
+        if (!(await fs.existsSync(filesPath))) await fs.mkdirSync(filesPath)
+
+        const imagePath = await joinPath([filesPath, `${message.id}.png`])
+        const base64 = message.content[0].text.annotations[0]
+        await this.storeImage(base64, imagePath)
+        if ((await fs.existsSync(imagePath)) && message.content?.length) {
+          // Use file path instead of blob
+          message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.png`
+        }
+      }
+
+      if (message.content[0]?.type === 'pdf') {
+        const filesPath = await joinPath([threadDirPath, 'files'])
+        if (!(await fs.existsSync(filesPath))) await fs.mkdirSync(filesPath)
+
+        const filePath = await joinPath([filesPath, `${message.id}.pdf`])
+        const blob = message.content[0].text.annotations[0]
+        await this.storeFile(blob, filePath)
+
+        if ((await fs.existsSync(filePath)) && message.content?.length) {
+          // Use file path instead of blob
+          message.content[0].text.annotations[0] = `threads/${message.thread_id}/files/${message.id}.pdf`
+        }
+      }
      await fs.appendFileSync(threadMessagePath, JSON.stringify(message) + '\n')
      Promise.resolve()
    } catch (err) {
@ -126,6 +152,25 @@ export default class JSONConversationalExtension
    }
  }

+  async storeImage(base64: string, filePath: string): Promise<void> {
+    const base64Data = base64.replace(/^data:image\/\w+;base64,/, '')
+
+    try {
+      await fs.writeBlob(filePath, base64Data)
+    } catch (err) {
+      console.error(err)
+    }
+  }
+
+  async storeFile(base64: string, filePath: string): Promise<void> {
+    const base64Data = base64.replace(/^data:application\/pdf;base64,/, '')
+    try {
+      await fs.writeBlob(filePath, base64Data)
+    } catch (err) {
+      console.error(err)
+    }
+  }
+
  async writeMessages(
    threadId: string,
    messages: ThreadMessage[]
@ -229,7 +274,11 @@ export default class JSONConversationalExtension

      const messages: ThreadMessage[] = []
      result.forEach((line: string) => {
-        messages.push(JSON.parse(line) as ThreadMessage)
+        try {
+          messages.push(JSON.parse(line) as ThreadMessage)
+        } catch (err) {
+          console.error(err)
+        }
      })
      return messages
    } catch (err) {
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ b/extensions/inference-nitro-extension/bin/version.txt
@ -1 +1 @@
-0.2.12
+0.3.5
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-nitro-extension/package.json
@ -35,11 +35,12 @@
    "rollup-plugin-sourcemaps": "^0.6.3",
    "rollup-plugin-typescript2": "^0.36.0",
    "run-script-os": "^1.1.6",
-    "typescript": "^5.3.3"
+    "typescript": "^5.2.2"
  },
  "dependencies": {
    "@janhq/core": "file:../../core",
    "@rollup/plugin-replace": "^5.0.5",
+    "@types/os-utils": "^0.0.4",
    "fetch-retry": "^5.0.6",
    "path-browserify": "^1.0.1",
    "rxjs": "^7.8.1",
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@ -2,22 +2,6 @@ declare const NODE: string;
 declare const INFERENCE_URL: string;
 declare const TROUBLESHOOTING_URL: string;

-/**
- * The parameters for the initModel function.
- * @property settings - The settings for the machine learning model.
- * @property settings.ctx_len - The context length.
- * @property settings.ngl - The number of generated tokens.
- * @property settings.cont_batching - Whether to use continuous batching.
- * @property settings.embedding - Whether to use embedding.
- */
-interface EngineSettings {
-  ctx_len: number;
-  ngl: number;
-  cpu_threads: number;
-  cont_batching: boolean;
-  embedding: boolean;
-}
-
 /**
 * The response from the initModel function.
 * @property error - An error message if the model fails to load.
@ -26,8 +10,3 @@ interface ModelOperationResponse {
  error?: any;
  modelFile?: string;
 }
-
-interface ResourcesInfo {
-  numCpuPhysicalCore: number;
-  memAvailable: number;
-}
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -24,6 +24,7 @@ import {
  MessageEvent,
  ModelEvent,
  InferenceEvent,
+  ModelSettingParams,
 } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
@ -45,12 +46,12 @@ export default class JanInferenceNitroExtension extends InferenceExtension {

  private _currentModel: Model | undefined;

-  private _engineSettings: EngineSettings = {
+  private _engineSettings: ModelSettingParams = {
    ctx_len: 2048,
    ngl: 100,
    cpu_threads: 1,
    cont_batching: false,
-    embedding: false,
+    embedding: true,
  };

  controller = new AbortController();
@ -83,19 +84,19 @@ export default class JanInferenceNitroExtension extends InferenceExtension {

    // Events subscription
    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.onMessageRequest(data)
+      this.onMessageRequest(data),
    );

    events.on(ModelEvent.OnModelInit, (model: Model) =>
-      this.onModelInit(model)
+      this.onModelInit(model),
    );

    events.on(ModelEvent.OnModelStop, (model: Model) =>
-      this.onModelStop(model)
+      this.onModelStop(model),
    );

    events.on(InferenceEvent.OnInferenceStopped, () =>
-      this.onInferenceStopped()
+      this.onInferenceStopped(),
    );

    // Attempt to fetch nvidia info
@ -120,7 +121,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
      } else {
        await fs.writeFileSync(
          engineFile,
-          JSON.stringify(this._engineSettings, null, 2)
+          JSON.stringify(this._engineSettings, null, 2),
        );
      }
    } catch (err) {
@ -133,6 +134,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {

    const modelFullPath = await joinPath(["models", model.id]);

+    this._currentModel = model;
    const nitroInitResult = await executeOnMain(NODE, "runModel", {
      modelFullPath,
      model,
@ -143,12 +145,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
      return;
    }

-    this._currentModel = model;
    events.emit(ModelEvent.OnModelReady, model);

    this.getNitroProcesHealthIntervalId = setInterval(
      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
+      JanInferenceNitroExtension._intervalHealthCheck,
    );
  }

@ -225,6 +226,9 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
   */
  private async onMessageRequest(data: MessageRequest) {
    if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
+      console.log(
+        `Model is not nitro or no model loaded ${data.model?.engine} ${this._currentModel}`
+      );
      return;
    }

--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -3,11 +3,19 @@ import path from "path";
 import { ChildProcessWithoutNullStreams, spawn } from "child_process";
 import tcpPortUsed from "tcp-port-used";
 import fetchRT from "fetch-retry";
-import { log, getJanDataFolderPath } from "@janhq/core/node";
+import {
+  log,
+  getJanDataFolderPath,
+  getSystemResourceInfo,
+} from "@janhq/core/node";
 import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
-import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
+import {
+  Model,
+  InferenceEngine,
+  ModelSettingParams,
+  PromptTemplate,
+} from "@janhq/core";
 import { executableNitroFile } from "./execute";
-import { physicalCpuCount } from "./utils";

 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch);
@ -19,25 +27,6 @@ interface ModelInitOptions {
  modelFullPath: string;
  model: Model;
 }
-
-/**
- * The response object of Prompt Template parsing.
- */
-interface PromptTemplate {
-  system_prompt?: string;
-  ai_prompt?: string;
-  user_prompt?: string;
-  error?: string;
-}
-
-/**
- * Model setting args for Nitro model load.
- */
-interface ModelSettingArgs extends ModelSettingParams {
-  llama_model_path: string;
-  cpu_threads: number;
-}
-
 // The PORT to use for the Nitro subprocess
 const PORT = 3928;
 // The HOST address to use for the Nitro subprocess
@ -60,7 +49,7 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
 // The current model file url
 let currentModelFile: string = "";
 // The current model settings
-let currentSettings: ModelSettingArgs | undefined = undefined;
+let currentSettings: ModelSettingParams | undefined = undefined;

 /**
 * Stops a Nitro subprocess.
@ -78,7 +67,7 @@ function stopModel(): Promise<void> {
 * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
 */
 async function runModel(
-  wrapper: ModelInitOptions
+  wrapper: ModelInitOptions,
 ): Promise<ModelOperationResponse | void> {
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    // Not a nitro model
@ -96,7 +85,7 @@ async function runModel(
  const ggufBinFile = files.find(
    (file) =>
      file === path.basename(currentModelFile) ||
-      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
+      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
  );

  if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -106,7 +95,7 @@ async function runModel(
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    return Promise.reject("Not a nitro model");
  } else {
-    const nitroResourceProbe = await getResourcesInfo();
+    const nitroResourceProbe = await getSystemResourceInfo();
    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
    if (wrapper.model.settings.prompt_template) {
      const promptTemplate = wrapper.model.settings.prompt_template;
@ -119,13 +108,20 @@ async function runModel(
      wrapper.model.settings.ai_prompt = prompt.ai_prompt;
    }

+    const modelFolderPath = path.join(janRoot, "models", wrapper.model.id);
+    const modelPath = wrapper.model.settings.llama_model_path
+      ? path.join(modelFolderPath, wrapper.model.settings.llama_model_path)
+      : currentModelFile;
+
    currentSettings = {
-      llama_model_path: currentModelFile,
      ...wrapper.model.settings,
+      llama_model_path: modelPath,
      // This is critical and requires real CPU physical core count (or performance core)
      cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
+      ...(wrapper.model.settings.mmproj && {
+        mmproj: path.join(modelFolderPath, wrapper.model.settings.mmproj),
+      }),
    };
-    console.log(currentSettings);
    return runNitroAndLoadModel();
  }
 }
@ -184,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
    const system_prompt = promptTemplate.substring(0, systemIndex);
    const user_prompt = promptTemplate.substring(
      systemIndex + systemMarker.length,
-      promptIndex
+      promptIndex,
    );
    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
    );

    // Return the split parts
@ -197,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
    const promptIndex = promptTemplate.indexOf(promptMarker);
    const user_prompt = promptTemplate.substring(0, promptIndex);
    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
+      promptIndex + promptMarker.length,
    );

    // Return the split parts
@ -213,6 +209,9 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
 */
 function loadLLMModel(settings: any): Promise<Response> {
+  if (!settings?.ngl) {
+    settings.ngl = 100;
+  }
  log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
    method: "POST",
@ -226,14 +225,14 @@ function loadLLMModel(settings: any): Promise<Response> {
    .then((res) => {
      log(
        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
-          res
-        )}`
+          res,
+        )}`,
      );
      return Promise.resolve(res);
    })
    .catch((err) => {
      log(`[NITRO]::Error: Load model failed with error ${err}`);
-      return Promise.reject();
+      return Promise.reject(err);
    });
 }

@ -255,8 +254,8 @@ async function validateModelStatus(): Promise<void> {
    retryDelay: 500,
  }).then(async (res: Response) => {
    log(
-      `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
-        res
+      `[NITRO]::Debug: Validate model state with response ${JSON.stringify(
+        res.status
      )}`
    );
    // If the response is OK, check model_loaded status.
@ -265,9 +264,19 @@ async function validateModelStatus(): Promise<void> {
      // If the model is loaded, return an empty object.
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
+        log(
+          `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
+            body
+          )}`
+        );
        return Promise.resolve();
      }
    }
+    log(
+      `[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
+        res.statusText
+      )}`
+    );
    return Promise.reject("Validate model status failed");
  });
 }
@ -308,7 +317,7 @@ function spawnNitroProcess(): Promise<any> {
    const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
    // Execute the binary
    log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
+      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
    );
    subprocess = spawn(
      executableOptions.executablePath,
@ -319,7 +328,7 @@ function spawnNitroProcess(): Promise<any> {
          ...process.env,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
        },
-      }
+      },
    );

    // Handle subprocess output
@ -344,22 +353,6 @@ function spawnNitroProcess(): Promise<any> {
  });
 }

-/**
- * Get the system resources information
- * TODO: Move to Core so that it can be reused
- */
-function getResourcesInfo(): Promise<ResourcesInfo> {
-  return new Promise(async (resolve) => {
-    const cpu = await physicalCpuCount();
-    log(`[NITRO]::CPU informations - ${cpu}`);
-    const response: ResourcesInfo = {
-      numCpuPhysicalCore: cpu,
-      memAvailable: 0,
-    };
-    resolve(response);
-  });
-}
-
 /**
 * Every module should have a dispose function
 * This will be called when the extension is unloaded and should clean up any resources
--- a/extensions/inference-nitro-extension/src/node/utils.ts
+++ b/extensions/inference-nitro-extension/src/node/utils.ts
@ -1,56 +0,0 @@
-import os from "os";
-import childProcess from "child_process";
-
-function exec(command: string): Promise<string> {
-  return new Promise((resolve, reject) => {
-    childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
-      if (error) {
-        reject(error);
-      } else {
-        resolve(stdout);
-      }
-    });
-  });
-}
-
-let amount: number;
-const platform = os.platform();
-
-export async function physicalCpuCount(): Promise<number> {
-  return new Promise((resolve, reject) => {
-    if (platform === "linux") {
-      exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
-        .then((output) => {
-          amount = parseInt(output.trim(), 10);
-          resolve(amount);
-        })
-        .catch(reject);
-    } else if (platform === "darwin") {
-      exec("sysctl -n hw.physicalcpu_max")
-        .then((output) => {
-          amount = parseInt(output.trim(), 10);
-          resolve(amount);
-        })
-        .catch(reject);
-    } else if (platform === "win32") {
-      exec("WMIC CPU Get NumberOfCores")
-        .then((output) => {
-          amount = output
-            .split(os.EOL)
-            .map((line: string) => parseInt(line))
-            .filter((value: number) => !isNaN(value))
-            .reduce((sum: number, number: number) => sum + number, 1);
-          resolve(amount);
-        })
-        .catch(reject);
-    } else {
-      const cores = os.cpus().filter((cpu: any, index: number) => {
-        const hasHyperthreading = cpu.model.includes("Intel");
-        const isOdd = index % 2 === 1;
-        return !hasHyperthreading || isOdd;
-      });
-      amount = cores.length;
-      resolve(amount);
-    }
-  });
-}
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@ -15,6 +15,7 @@ import {
  ThreadMessage,
  events,
  fs,
+  InferenceEngine,
  BaseExtension,
  MessageEvent,
  ModelEvent,
@ -114,7 +115,7 @@ export default class JanInferenceOpenAIExtension extends BaseExtension {
    }
  }
  private static async handleModelInit(model: OpenAIModel) {
-    if (model.engine !== "openai") {
+    if (model.engine !== InferenceEngine.openai) {
      return;
    } else {
      JanInferenceOpenAIExtension._currentModel = model;
--- a/extensions/inference-openai-extension/tsconfig.json
+++ b/extensions/inference-openai-extension/tsconfig.json
@ -3,13 +3,12 @@
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
-
    "outDir": "./dist",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
-    "rootDir": "./src"
+    "rootDir": "./src",
  },
-  "include": ["./src"]
+  "include": ["./src"],
 }
--- a/extensions/inference-triton-trtllm-extension/tsconfig.json
+++ b/extensions/inference-triton-trtllm-extension/tsconfig.json
@ -3,13 +3,12 @@
    "target": "es2016",
    "module": "ES6",
    "moduleResolution": "node",
-
    "outDir": "./dist",
    "esModuleInterop": true,
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
-    "rootDir": "./src"
+    "rootDir": "./src",
  },
-  "include": ["./src"]
+  "include": ["./src"],
 }
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -1,6 +1,6 @@
 {
  "name": "@janhq/model-extension",
-  "version": "1.0.22",
+  "version": "1.0.23",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
  "module": "dist/module.js",
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -80,16 +80,34 @@ export default class JanModelExtension extends ModelExtension {
    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
    if (!(await fs.existsSync(modelDirPath))) await fs.mkdirSync(modelDirPath)

-    // try to retrieve the download file name from the source url
-    // if it fails, use the model ID as the file name
-    const extractedFileName = await model.source_url.split('/').pop()
+    if (model.sources.length > 1) {
+      // path to model binaries
+      for (const source of model.sources) {
+        let path = this.extractFileName(source.url)
+        if (source.filename) {
+          path = await joinPath([modelDirPath, source.filename])
+        }
+
+        downloadFile(source.url, path, network)
+      }
+    } else {
+      const fileName = this.extractFileName(model.sources[0]?.url)
+      const path = await joinPath([modelDirPath, fileName])
+      downloadFile(model.sources[0]?.url, path, network)
+    }
+  }
+
+  /**
+   *  try to retrieve the download file name from the source url
+   */
+  private extractFileName(url: string): string {
+    const extractedFileName = url.split('/').pop()
    const fileName = extractedFileName
      .toLowerCase()
      .endsWith(JanModelExtension._supportedModelFormat)
      ? extractedFileName
-      : model.id
-    const path = await joinPath([modelDirPath, fileName])
-    downloadFile(model.source_url, path, network)
+      : extractedFileName + JanModelExtension._supportedModelFormat
+    return fileName
  }

  /**
@ -98,6 +116,7 @@ export default class JanModelExtension extends ModelExtension {
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
  async cancelModelDownload(modelId: string): Promise<void> {
+    const model = await this.getConfiguredModels()
    return abortDownload(
      await joinPath([JanModelExtension._homeDir, modelId, modelId])
    ).then(async () => {
@ -163,15 +182,16 @@ export default class JanModelExtension extends ModelExtension {
          .then((files: string[]) => {
            // or model binary exists in the directory
            // model binary name can match model ID or be a .gguf file and not be an incompleted model file
+            // TODO: Check diff between urls, filenames
            return (
              files.includes(modelDir) ||
-              files.some(
+              files.filter(
                (file) =>
                  file
                    .toLowerCase()
                    .includes(JanModelExtension._supportedModelFormat) &&
                  !file.endsWith(JanModelExtension._incompletedModelFileName)
-              )
+              )?.length >= model.sources.length
            )
          })
      }
@ -198,7 +218,6 @@ export default class JanModelExtension extends ModelExtension {

      const readJsonPromises = allDirectories.map(async (dirName) => {
        // filter out directories that don't match the selector
-
        // read model.json
        const jsonPath = await joinPath([
          JanModelExtension._homeDir,
@ -226,7 +245,21 @@ export default class JanModelExtension extends ModelExtension {
      const modelData = results.map((result) => {
        if (result.status === 'fulfilled') {
          try {
-            return result.value as Model
+            // This to ensure backward compatibility with `model.json` with `source_url`
+            const tmpModel =
+              typeof result.value === 'object'
+                ? result.value
+                : JSON.parse(result.value)
+            if (tmpModel['source_url'] != null) {
+              tmpModel['source'] = [
+                {
+                  filename: tmpModel.id,
+                  url: tmpModel['source_url'],
+                },
+              ]
+            }
+
+            return tmpModel as Model
          } catch {
            console.debug(`Unable to parse model metadata: ${result.value}`)
            return undefined
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,5 +1,4 @@
-import { MonitoringExtension } from "@janhq/core";
-import { executeOnMain } from "@janhq/core";
+import { MonitoringExtension, executeOnMain } from "@janhq/core";

 /**
 * JanMonitoringExtension is a extension that provides system monitoring functionality.
--- a/models/bakllava-1/model.json
+++ b/models/bakllava-1/model.json
@ -0,0 +1,33 @@
+{
+  "sources": [
+    {
+      "filename": "ggml-model-q5_k.gguf",
+      "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/ggml-model-q5_k.gguf"
+    },
+    {
+      "filename": "mmproj-model-f16.gguf",
+      "url": "https://huggingface.co/mys/ggml_bakllava-1/resolve/main/mmproj-model-f16.gguf"
+    }
+  ],
+  "id": "bakllava-1",
+  "object": "model",
+  "name": "BakLlava 1",
+  "version": "1.0",
+  "description": "BakLlava 1 can bring vision understanding to Jan",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
+    "llama_model_path": "ggml-model-q5_k.gguf",
+    "mmproj": "mmproj-model-f16.gguf"
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "Mys",
+    "tags": ["Vision"],
+    "size": 5750000000
+  },
+  "engine": "nitro"
+}
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf",
-    "id": "capybara-34b",
-    "object": "model",
-    "name": "Capybara 200k 34B Q5",
-    "version": "1.0",
-    "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "USER:\n{prompt}\nASSISTANT:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "NousResearch, The Bloke",
-      "tags": ["34B", "Finetuned"],
-      "size": 24320000000
-    }, 
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "nous-capybara-34b.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Nous-Capybara-34B-GGUF/resolve/main/nous-capybara-34b.Q5_K_M.gguf"
+    }
+  ],
+  "id": "capybara-34b",
+  "object": "model",
+  "name": "Capybara 200k 34B Q5",
+  "version": "1.0",
+  "description": "Nous Capybara 34B is a long context length model that supports 200K tokens.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "USER:\n{prompt}\nASSISTANT:",
+    "llama_model_path": "nous-capybara-34b.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "NousResearch, The Bloke",
+    "tags": ["34B", "Finetuned"],
+    "size": 24320000000
+  },
+  "engine": "nitro"
+}
--- a/models/codeninja-1.0-7b/model.json
+++ b/models/codeninja-1.0-7b/model.json
@ -1,29 +1,33 @@
 {
-    "source_url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf",
-    "id": "codeninja-1.0-7b",
-    "object": "model",
-    "name": "CodeNinja 7B Q4",
-    "version": "1.0",
-    "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": ["<|end_of_turn|>"],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Beowolx",
-      "tags": ["7B", "Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "codeninja-1.0-openchat-7b.Q4_K_M.gguf",
+      "url": "https://huggingface.co/beowolx/CodeNinja-1.0-OpenChat-7B-GGUF/resolve/main/codeninja-1.0-openchat-7b.Q4_K_M.gguf"
+    }
+  ],
+  "id": "codeninja-1.0-7b",
+  "object": "model",
+  "name": "CodeNinja 7B Q4",
+  "version": "1.0",
+  "description": "CodeNinja is good for coding tasks and can handle various languages including Python, C, C++, Rust, Java, JavaScript, and more.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
+    "llama_model_path": "codeninja-1.0-openchat-7b.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Beowolx",
+    "tags": ["7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/config/default-model.json
+++ b/models/config/default-model.json
@ -2,7 +2,12 @@
  "object": "model",
  "version": 1,
  "format": "gguf",
-  "source_url": "N/A",
+  "sources": [
+    {
+      "url": "N/A",
+      "filename": "N/A"
+    }
+  ],
  "id": "N/A",
  "name": "N/A",
  "created": 0,
@ -10,7 +15,8 @@
  "settings": {
    "ctx_len": 4096,
    "embedding": false,
-    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:"
+    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
+    "llama_model_path": "N/A"
  },
  "parameters": {
    "temperature": 0.7,
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@ -1,29 +1,34 @@
-
 {
-    "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf",
-    "id": "deepseek-coder-1.3b",
-    "object": "model",
-    "name": "Deepseek Coder 1.3B Q8",
-    "version": "1.0",
-    "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Deepseek, The Bloke",
-      "tags": ["Tiny", "Foundational Model"],
-      "size": 1430000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "deepseek-coder-1.3b-instruct.Q8_0.gguf",
+      "url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf"
+    }
+  ],
+  "id": "deepseek-coder-1.3b",
+  "object": "model",
+  "name": "Deepseek Coder 1.3B Q8",
+  "version": "1.0",
+  "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "deepseek-coder-1.3b-instruct.Q8_0.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Deepseek, The Bloke",
+    "tags": ["Tiny", "Foundational Model"],
+    "size": 1430000000
+  },
+  "engine": "nitro"
+}
--- a/models/deepseek-coder-34b/model.json
+++ b/models/deepseek-coder-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf",
-    "id": "deepseek-coder-34b",
-    "object": "model",
-    "name": "Deepseek Coder 33B Q5",
-    "version": "1.0",
-    "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Deepseek, The Bloke",
-      "tags": ["34B", "Foundational Model"],
-      "size": 19940000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "deepseek-coder-33b-instruct.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/deepseek-coder-33B-instruct-GGUF/resolve/main/deepseek-coder-33b-instruct.Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-coder-34b",
+  "object": "model",
+  "name": "Deepseek Coder 33B Q5",
+  "version": "1.0",
+  "description": "Deepseek Coder excelled in project-level code completion with advanced capabilities across multiple programming languages.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "deepseek-coder-33b-instruct.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Deepseek, The Bloke",
+    "tags": ["34B", "Foundational Model"],
+    "size": 19940000000
+  },
+  "engine": "nitro"
+}
--- a/models/dolphin-2.7-mixtral-8x7b/model.json
+++ b/models/dolphin-2.7-mixtral-8x7b/model.json
@ -1,28 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
-    "id": "dolphin-2.7-mixtral-8x7b",
-    "object": "model",
-    "name": "Dolphin 8x7B Q4",
-    "version": "1.0",
-    "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Cognitive Computations, TheBloke",
-      "tags": ["70B", "Finetuned"],
-      "size": 26440000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/dolphin-2.7-mixtral-8x7b-GGUF/resolve/main/dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
+    }
+  ],
+  "id": "dolphin-2.7-mixtral-8x7b",
+  "object": "model",
+  "name": "Dolphin 8x7B Q4",
+  "version": "1.0",
+  "description": "Dolphin is an uncensored model built on Mixtral-8x7b. It is good at programming tasks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "dolphin-2.7-mixtral-8x7b.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Cognitive Computations, TheBloke",
+    "tags": ["70B", "Finetuned"],
+    "size": 26440000000
+  },
+  "engine": "nitro"
+}
--- a/models/gpt-3.5-turbo-16k-0613/model.json
+++ b/models/gpt-3.5-turbo-16k-0613/model.json
@ -1,18 +1,20 @@
 {
-    "source_url": "https://openai.com",
-    "id": "gpt-3.5-turbo-16k-0613",
-    "object": "model",
-    "name": "OpenAI GPT 3.5 Turbo 16k 0613",
-    "version": "1.0",
-    "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
-    "format": "api",
-    "settings": {},
-    "parameters": {},
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "openai",
-    "state": "ready"
+  "sources": [
+    {
+      "url": "https://openai.com"
+    }
+  ],
+  "id": "gpt-3.5-turbo-16k-0613",
+  "object": "model",
+  "name": "OpenAI GPT 3.5 Turbo 16k 0613",
+  "version": "1.0",
+  "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
+  "format": "api",
+  "settings": {},
+  "parameters": {},
+  "metadata": {
+    "author": "OpenAI",
+    "tags": ["General", "Big Context Length"]
+  },
+  "engine": "openai"
 }
-  
--- a/models/gpt-3.5-turbo/model.json
+++ b/models/gpt-3.5-turbo/model.json
@ -1,18 +1,20 @@
 {
-    "source_url": "https://openai.com",
-    "id": "gpt-3.5-turbo",
-    "object": "model",
-    "name": "OpenAI GPT 3.5 Turbo",
-    "version": "1.0",
-    "description": "OpenAI GPT 3.5 Turbo model is extremely good",
-    "format": "api",
-    "settings": {},
-    "parameters": {},
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "openai",
-    "state": "ready"
+  "sources": [
+    {
+      "url": "https://openai.com"
+    }
+  ],
+  "id": "gpt-3.5-turbo",
+  "object": "model",
+  "name": "OpenAI GPT 3.5 Turbo",
+  "version": "1.0",
+  "description": "OpenAI GPT 3.5 Turbo model is extremely good",
+  "format": "api",
+  "settings": {},
+  "parameters": {},
+  "metadata": {
+    "author": "OpenAI",
+    "tags": ["General", "Big Context Length"]
+  },
+  "engine": "openai"
 }
-  
--- a/models/gpt-4/model.json
+++ b/models/gpt-4/model.json
@ -1,18 +1,20 @@
 {
-    "source_url": "https://openai.com",
-    "id": "gpt-4",
-    "object": "model",
-    "name": "OpenAI GPT 4",
-    "version": "1.0",
-    "description": "OpenAI GPT 4 model is extremely good",
-    "format": "api",
-    "settings": {},
-    "parameters": {},
-    "metadata": {
-      "author": "OpenAI",
-      "tags": ["General", "Big Context Length"]
-    },
-    "engine": "openai",
-    "state": "ready"
+  "sources": [
+    {
+      "url": "https://openai.com"
+    }
+  ],
+  "id": "gpt-4",
+  "object": "model",
+  "name": "OpenAI GPT 4",
+  "version": "1.0",
+  "description": "OpenAI GPT 4 model is extremely good",
+  "format": "api",
+  "settings": {},
+  "parameters": {},
+  "metadata": {
+    "author": "OpenAI",
+    "tags": ["General", "Big Context Length"]
+  },
+  "engine": "openai"
 }
-  
--- a/models/llama2-chat-70b-q4/model.json
+++ b/models/llama2-chat-70b-q4/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf",
-    "id": "llama2-chat-70b-q4",
-    "object": "model",
-    "name": "Llama 2 Chat 70B Q4",
-    "version": "1.0",
-    "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MetaAI, The Bloke",
-      "tags": ["70B", "Foundational Model"],
-      "size": 43920000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "llama-2-70b-chat.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Llama-2-70B-Chat-GGUF/resolve/main/llama-2-70b-chat.Q4_K_M.gguf"
+    }
+  ],
+  "id": "llama2-chat-70b-q4",
+  "object": "model",
+  "name": "Llama 2 Chat 70B Q4",
+  "version": "1.0",
+  "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
+    "llama_model_path": "llama-2-70b-chat.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MetaAI, The Bloke",
+    "tags": ["70B", "Foundational Model"],
+    "size": 43920000000
+  },
+  "engine": "nitro"
+}
--- a/models/llama2-chat-7b-q4/model.json
+++ b/models/llama2-chat-7b-q4/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf",
-    "id": "llama2-chat-7b-q4",
-    "object": "model",
-    "name": "Llama 2 Chat 7B Q4",
-    "version": "1.0",
-    "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MetaAI, The Bloke",
-      "tags": ["7B", "Foundational Model"],
-      "size": 4080000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "llama-2-7b-chat.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf"
+    }
+  ],
+  "id": "llama2-chat-7b-q4",
+  "object": "model",
+  "name": "Llama 2 Chat 7B Q4",
+  "version": "1.0",
+  "description": "Llama 2 Chat 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]",
+    "llama_model_path": "llama-2-7b-chat.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MetaAI, The Bloke",
+    "tags": ["7B", "Foundational Model"],
+    "size": 4080000000
+  },
+  "engine": "nitro"
+}
--- a/models/llava-1.5-13b-q5/model.json
+++ b/models/llava-1.5-13b-q5/model.json
@ -0,0 +1,33 @@
+{
+  "sources": [
+    {
+      "filename": "ggml-model-q5_k.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/ggml-model-q5_k.gguf"
+    },
+    {
+      "filename": "mmproj-model-f16.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-13b/resolve/main/mmproj-model-f16.gguf"
+    }
+  ],
+  "id": "llava-1.5-13b-q5",
+  "object": "model",
+  "name": "LlaVa 1.5 13B Q5 K",
+  "version": "1.0",
+  "description": "LlaVa 1.5 can bring vision understanding to Jan",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
+    "llama_model_path": "ggml-model-q5_k.gguf",
+    "mmproj": "mmproj-model-f16.gguf"
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "Mys",
+    "tags": ["Vision"],
+    "size": 9850000000
+  },
+  "engine": "nitro"
+}
--- a/models/llava-1.5-7b-q5/model.json
+++ b/models/llava-1.5-7b-q5/model.json
@ -0,0 +1,33 @@
+{
+  "sources": [
+    {
+      "filename": "ggml-model-q5_k.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/ggml-model-q5_k.gguf"
+    },
+    {
+      "filename": "mmproj-model-f16.gguf",
+      "url": "https://huggingface.co/mys/ggml_llava-v1.5-7b/resolve/main/mmproj-model-f16.gguf"
+    }
+  ],
+  "id": "llava-1.5-7b-q5",
+  "object": "model",
+  "name": "LlaVa 1.5 7B Q5 K",
+  "version": "1.0",
+  "description": "LlaVa 1.5 can bring vision understanding to Jan",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "\n### Instruction:\n{prompt}\n### Response:\n",
+    "llama_model_path": "ggml-model-q5_k.gguf",
+    "mmproj": "mmproj-model-f16.gguf"
+  },
+  "parameters": {
+    "max_tokens": 4096
+  },
+  "metadata": {
+    "author": "Mys",
+    "tags": ["Vision"],
+    "size": 5400000000
+  },
+  "engine": "nitro"
+}
--- a/models/mistral-ins-7b-q4/model.json
+++ b/models/mistral-ins-7b-q4/model.json
@ -1,30 +1,35 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf",
-    "id": "mistral-ins-7b-q4",
-    "object": "model",
-    "name": "Mistral Instruct 7B Q4",
-    "version": "1.0",
-    "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] {prompt} [/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MistralAI, The Bloke",
-      "tags": ["Featured", "7B", "Foundational Model"],
-      "size": 4370000000,
-      "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "mistral-7b-instruct-v0.2.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+    }
+  ],
+  "id": "mistral-ins-7b-q4",
+  "object": "model",
+  "name": "Mistral Instruct 7B Q4",
+  "version": "1.0",
+  "description": "Mistral Instruct 7b model, specifically designed for a comprehensive understanding through training on extensive internet data.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] {prompt} [/INST]",
+    "llama_model_path": "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MistralAI, The Bloke",
+    "tags": ["Featured", "7B", "Foundational Model"],
+    "size": 4370000000,
+    "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
+  },
+  "engine": "nitro"
+}
--- a/models/mixtral-8x7b-instruct/model.json
+++ b/models/mixtral-8x7b-instruct/model.json
@ -1,28 +1,33 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
-    "id": "mixtral-8x7b-instruct",
-    "object": "model",
-    "name": "Mixtral 8x7B Instruct Q4",
-    "version": "1.0",
-    "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "[INST] {prompt} [/INST]"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "MistralAI, TheBloke",
-      "tags": ["70B", "Foundational Model"],
-      "size": 26440000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF/resolve/main/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
+    }
+  ],
+  "id": "mixtral-8x7b-instruct",
+  "object": "model",
+  "name": "Mixtral 8x7B Instruct Q4",
+  "version": "1.0",
+  "description": "The Mixtral-8x7B is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms 70B models on most benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "[INST] {prompt} [/INST]",
+    "llama_model_path": "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "MistralAI, TheBloke",
+    "tags": ["70B", "Foundational Model"],
+    "size": 26440000000
+  },
+  "engine": "nitro"
+}
--- a/models/noromaid-7b/model.json
+++ b/models/noromaid-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf",
-    "id": "noromaid-7b",
-    "object": "model",
-    "name": "Noromaid 7B Q5",
-    "version": "1.0",
-    "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "NeverSleep",
-      "tags": ["7B", "Merged"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "Noromaid-7b-v0.1.1.q5_k_m.gguf",
+      "url": "https://huggingface.co/NeverSleep/Noromaid-7b-v0.1.1-GGUF/resolve/main/Noromaid-7b-v0.1.1.q5_k_m.gguf"
+    }
+  ],
+  "id": "noromaid-7b",
+  "object": "model",
+  "name": "Noromaid 7B Q5",
+  "version": "1.0",
+  "description": "The Noromaid 7b model is designed for role-playing with human-like behavior.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:{prompt}\n### Response:",
+    "llama_model_path": "Noromaid-7b-v0.1.1.q5_k_m.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "NeverSleep",
+    "tags": ["7B", "Merged"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/openchat-3.5-7b/model.json
+++ b/models/openchat-3.5-7b/model.json
@ -1,28 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf",
-    "id": "openchat-3.5-7b",
-    "object": "model",
-    "name": "Openchat-3.5 7B Q4",
-    "version": "1.0",
-    "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": ["<|end_of_turn|>"],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Openchat",
-      "tags": ["Recommended", "7B", "Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "openchat-3.5-1210.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/openchat-3.5-1210-GGUF/resolve/main/openchat-3.5-1210.Q4_K_M.gguf"
+    }
+  ],
+  "id": "openchat-3.5-7b",
+  "object": "model",
+  "name": "Openchat-3.5 7B Q4",
+  "version": "1.0",
+  "description": "The performance of this open-source model surpasses that of ChatGPT-3.5 and Grok-1 across various benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "GPT4 Correct User: {prompt}<|end_of_turn|>GPT4 Correct Assistant:",
+    "llama_model_path": "openchat-3.5-1210.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": ["<|end_of_turn|>"],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Openchat",
+    "tags": ["Recommended", "7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/openhermes-neural-7b/model.json
+++ b/models/openhermes-neural-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
-    "id": "openhermes-neural-7b",
-    "object": "model",
-    "name": "OpenHermes Neural 7B Q4",
-    "version": "1.0",
-    "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Intel, Jan",
-      "tags": ["7B", "Merged", "Featured"],
-      "size": 4370000000,
-      "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/openhermes-2.5-neural-chat-v3-3-slerp-GGUF/resolve/main/openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
+    }
+  ],
+  "id": "openhermes-neural-7b",
+  "object": "model",
+  "name": "OpenHermes Neural 7B Q4",
+  "version": "1.0",
+  "description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "openhermes-2.5-neural-chat-v3-3-slerp.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Intel, Jan",
+    "tags": ["7B", "Merged", "Featured"],
+    "size": 4370000000,
+    "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
+  },
+  "engine": "nitro"
+}
--- a/models/phi-2-3b/model.json
+++ b/models/phi-2-3b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf",
-    "id": "phi-2-3b",
-    "object": "model",
-    "name": "Phi-2 3B Q8",
-    "version": "1.0",
-    "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "Intruct:\n{prompt}\nOutput:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Microsoft",
-      "tags": ["3B","Foundational Model"],
-      "size": 2960000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "phi-2.Q8_0.gguf",
+      "url": "https://huggingface.co/TheBloke/phi-2-GGUF/resolve/main/phi-2.Q8_0.gguf"
+    }
+  ],
+  "id": "phi-2-3b",
+  "object": "model",
+  "name": "Phi-2 3B Q8",
+  "version": "1.0",
+  "description": "Phi-2 is a 2.7B model, excelling in common sense and logical reasoning benchmarks, trained with synthetic texts and filtered websites.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "Intruct:\n{prompt}\nOutput:",
+    "llama_model_path": "phi-2.Q8_0.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Microsoft",
+    "tags": ["3B", "Foundational Model"],
+    "size": 2960000000
+  },
+  "engine": "nitro"
+}
--- a/models/phind-34b/model.json
+++ b/models/phind-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf",
-    "id": "phind-34b",
-    "object": "model",
-    "name": "Phind 34B Q5",
-    "version": "1.0",
-    "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Phind, The Bloke",
-      "tags": ["34B", "Finetuned"],
-      "size": 20220000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "phind-codellama-34b-v2.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Phind-CodeLlama-34B-v2-GGUF/resolve/main/phind-codellama-34b-v2.Q5_K_M.gguf"
+    }
+  ],
+  "id": "phind-34b",
+  "object": "model",
+  "name": "Phind 34B Q5",
+  "version": "1.0",
+  "description": "Phind 34B is fine-tuned on 1.5B tokens of high-quality programming data. This multi-lingual model excels in various programming languages and is designed to be steerable and user-friendly.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant",
+    "llama_model_path": "phind-codellama-34b-v2.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Phind, The Bloke",
+    "tags": ["34B", "Finetuned"],
+    "size": 20220000000
+  },
+  "engine": "nitro"
+}
--- a/models/solar-10.7b-slerp/model.json
+++ b/models/solar-10.7b-slerp/model.json
@ -1,29 +1,33 @@
 {
-    "source_url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf",
-    "id": "solar-10.7b-slerp",
-    "object": "model",
-    "name": "Solar Slerp 10.7B Q4",
-    "version": "1.0",
-    "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### User: {prompt}\n### Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": ["13B","Finetuned"],
-      "size": 6360000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "solar-10.7b-slerp.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/Solar-10.7B-SLERP-GGUF/resolve/main/solar-10.7b-slerp.Q4_K_M.gguf"
+    }
+  ],
+  "id": "solar-10.7b-slerp",
+  "object": "model",
+  "name": "Solar Slerp 10.7B Q4",
+  "version": "1.0",
+  "description": "This model uses the Slerp merge method from SOLAR Instruct and Pandora-v1",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### User: {prompt}\n### Assistant:",
+    "llama_model_path": "solar-10.7b-slerp.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Jan",
+    "tags": ["13B", "Finetuned"],
+    "size": 6360000000
+  },
+  "engine": "nitro"
+}
--- a/models/starling-7b/model.json
+++ b/models/starling-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf",
-    "id": "starling-7b",
-    "object": "model",
-    "name": "Starling alpha 7B Q4",
-    "version": "1.0",
-    "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": ["<|end_of_turn|>"],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Berkeley-nest, The Bloke",
-      "tags": ["7B","Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "starling-lm-7b-alpha.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Starling-LM-7B-alpha-GGUF/resolve/main/starling-lm-7b-alpha.Q4_K_M.gguf"
+    }
+  ],
+  "id": "starling-7b",
+  "object": "model",
+  "name": "Starling alpha 7B Q4",
+  "version": "1.0",
+  "description": "Starling 7B, an upgrade of Openchat 3.5 using RLAIF, is really good at various benchmarks, especially with GPT-4 judging its performance.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:",
+    "llama_model_path": "starling-lm-7b-alpha.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": ["<|end_of_turn|>"],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Berkeley-nest, The Bloke",
+    "tags": ["7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/stealth-v1.2-7b/model.json
+++ b/models/stealth-v1.2-7b/model.json
@ -1,32 +1,33 @@
 {
-    "source_url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf",
-    "id": "stealth-v1.2-7b",
-    "object": "model",
-    "name": "Stealth 7B Q4",
-    "version": "1.0",
-    "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": [
-        "7B",
-        "Finetuned",
-        "Featured"
-      ],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "stealth-v1.3.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/stealth-v1.3-GGUF/resolve/main/stealth-v1.3.Q4_K_M.gguf"
+    }
+  ],
+  "id": "stealth-v1.2-7b",
+  "object": "model",
+  "name": "Stealth 7B Q4",
+  "version": "1.0",
+  "description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "stealth-v1.3.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Jan",
+    "tags": ["7B", "Finetuned", "Featured"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@ -1,5 +1,10 @@
 {
-  "source_url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+  "sources": [
+    {
+      "filename": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/resolve/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+    }
+  ],
  "id": "tinyllama-1.1b",
  "object": "model",
  "name": "TinyLlama Chat 1.1B Q4",
@ -7,8 +12,9 @@
  "description": "TinyLlama is a tiny model with only 1.1B. It's a good model for less powerful computers.",
  "format": "gguf",
  "settings": {
-      "ctx_len": 2048,
-      "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>"
+    "ctx_len": 4096,
+    "prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>",
+    "llama_model_path": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
  },
  "parameters": {
    "temperature": 0.7,
@ -20,9 +26,9 @@
    "presence_penalty": 0
  },
  "metadata": {
-      "author": "TinyLlama",
-      "tags": ["Tiny", "Foundation Model"],
-      "size": 669000000
+    "author": "TinyLlama",
+    "tags": ["Tiny", "Foundation Model"],
+    "size": 669000000
  },
  "engine": "nitro"
-}
+}
--- a/models/trinity-v1.2-7b/model.json
+++ b/models/trinity-v1.2-7b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf",
-    "id": "trinity-v1.2-7b",
-    "object": "model",
-    "name": "Trinity-v1.2 7B Q4",
-    "version": "1.0",
-    "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Jan",
-      "tags": ["7B", "Merged", "Featured"],
-      "size": 4370000000,
-      "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
-    },
-    "engine": "nitro"
-  }  
+  "sources": [
+    {
+      "filename": "trinity-v1.2.Q4_K_M.gguf",
+      "url": "https://huggingface.co/janhq/trinity-v1.2-GGUF/resolve/main/trinity-v1.2.Q4_K_M.gguf"
+    }
+  ],
+  "id": "trinity-v1.2-7b",
+  "object": "model",
+  "name": "Trinity-v1.2 7B Q4",
+  "version": "1.0",
+  "description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "trinity-v1.2.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Jan",
+    "tags": ["7B", "Merged", "Featured"],
+    "size": 4370000000,
+    "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/trinity-v1.2-7b/cover.png"
+  },
+  "engine": "nitro"
+}
--- a/models/tulu-2-70b/model.json
+++ b/models/tulu-2-70b/model.json
@ -1,28 +1,33 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf",
-    "id": "tulu-2-70b",
-    "object": "model",
-    "name": "Tulu 2 70B Q4",
-    "version": "1.0",
-    "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|user|>\n{prompt}\n<|assistant|>"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "Lizpreciatior, The Bloke",
-      "tags": ["70B", "Finetuned"],
-      "size": 41400000000
-    },
-    "engine": "nitro"
-  }
+  "sources": [
+    {
+      "filename": "tulu-2-dpo-70b.Q4_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/tulu-2-dpo-70B-GGUF/resolve/main/tulu-2-dpo-70b.Q4_K_M.gguf"
+    }
+  ],
+  "id": "tulu-2-70b",
+  "object": "model",
+  "name": "Tulu 2 70B Q4",
+  "version": "1.0",
+  "description": "Tulu 70B is a strong alternative to Llama 2 70b Chat to act as helpful assistants.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|user|>\n{prompt}\n<|assistant|>",
+    "llama_model_path": "tulu-2-dpo-70b.Q4_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "Lizpreciatior, The Bloke",
+    "tags": ["70B", "Finetuned"],
+    "size": 41400000000
+  },
+  "engine": "nitro"
+}
--- a/models/wizardcoder-13b/model.json
+++ b/models/wizardcoder-13b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
-    "id": "wizardcoder-13b",
-    "object": "model",
-    "name": "Wizard Coder Python 13B Q5",
-    "version": "1.0",
-    "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "### Instruction:\n{prompt}\n### Response:"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "WizardLM, The Bloke",
-      "tags": ["Recommended", "13B", "Finetuned"],
-      "size": 7870000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/WizardCoder-Python-13B-V1.0-GGUF/resolve/main/wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
+    }
+  ],
+  "id": "wizardcoder-13b",
+  "object": "model",
+  "name": "Wizard Coder Python 13B Q5",
+  "version": "1.0",
+  "description": "WizardCoder 13B is a Python coding model. This model demonstrate high proficiency in specific domains like coding and mathematics.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "### Instruction:\n{prompt}\n### Response:",
+    "llama_model_path": "wizardcoder-python-13b-v1.0.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "WizardLM, The Bloke",
+    "tags": ["Recommended", "13B", "Finetuned"],
+    "size": 7870000000
+  },
+  "engine": "nitro"
+}
--- a/models/yarn-mistral-7b/model.json
+++ b/models/yarn-mistral-7b/model.json
@ -1,29 +1,31 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf",
-    "id": "yarn-mistral-7b",
-    "object": "model",
-    "name": "Yarn Mistral 7B Q4",
-    "version": "1.0",
-    "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "{prompt}"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "NousResearch, The Bloke",
-      "tags": ["7B","Finetuned"],
-      "size": 4370000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "url": "https://huggingface.co/TheBloke/Yarn-Mistral-7B-128k-GGUF/resolve/main/yarn-mistral-7b-128k.Q4_K_M.gguf"
+    }
+  ],
+  "id": "yarn-mistral-7b",
+  "object": "model",
+  "name": "Yarn Mistral 7B Q4",
+  "version": "1.0",
+  "description": "Yarn Mistral 7B is a language model for long context and supports a 128k token context window.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "{prompt}"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "NousResearch, The Bloke",
+    "tags": ["7B", "Finetuned"],
+    "size": 4370000000
+  },
+  "engine": "nitro"
+}
--- a/models/yi-34b/model.json
+++ b/models/yi-34b/model.json
@ -1,29 +1,34 @@
 {
-    "source_url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf",
-    "id": "yi-34b",
-    "object": "model",
-    "name": "Yi 34B Q5",
-    "version": "1.0",
-    "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
-    "format": "gguf",
-    "settings": {
-      "ctx_len": 4096,
-      "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
-    },
-    "parameters": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "max_tokens": 4096,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "metadata": {
-      "author": "01-ai, The Bloke",
-      "tags": ["34B", "Foundational Model"],
-      "size": 20660000000
-    },
-    "engine": "nitro"
-  }
-  
+  "sources": [
+    {
+      "filename": "yi-34b-chat.Q5_K_M.gguf",
+      "url": "https://huggingface.co/TheBloke/Yi-34B-Chat-GGUF/resolve/main/yi-34b-chat.Q5_K_M.gguf"
+    }
+  ],
+  "id": "yi-34b",
+  "object": "model",
+  "name": "Yi 34B Q5",
+  "version": "1.0",
+  "description": "Yi-34B, a specialized chat model, is known for its diverse and creative responses and excels across various NLP tasks and benchmarks.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 4096,
+    "prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant",
+    "llama_model_path": "yi-34b-chat.Q5_K_M.gguf"
+  },
+  "parameters": {
+    "temperature": 0.7,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 4096,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "01-ai, The Bloke",
+    "tags": ["34B", "Foundational Model"],
+    "size": 20660000000
+  },
+  "engine": "nitro"
+}
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .2.12
 .3.5