From 8bd4a3389fa6fbef3547daa37433c11d344f4e50 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 23 Jun 2025 13:06:29 +0700
Subject: [PATCH] refactor: frontend uses new engine extension

# Conflicts:
#	extensions/model-extension/resources/default.json
#	web-app/src/containers/dialogs/DeleteProvider.tsx
#	web-app/src/routes/hub.tsx
---
 .../browser/extensions/engines/AIEngine.ts    |  104 +-
 .../extensions/enginesManagement.test.ts      |  566 --
 .../browser/extensions/enginesManagement.ts   |  115 -
 .../extensions/hardwareManagement.test.ts     |  146 -
 .../browser/extensions/hardwareManagement.ts  |   26 -
 core/src/browser/extensions/index.ts          |   17 -
 core/src/browser/extensions/model.test.ts     |  286 -
 core/src/browser/extensions/model.ts          |   48 -
 .../engine-management-extension/engines.mjs   |   47 -
 .../jest.config.js                            |    5 -
 .../models/anthropic.json                     |   58 -
 .../models/cohere.json                        |   44 -
 .../models/deepseek.json                      |   28 -
 .../models/google_gemini.json                 |   93 -
 .../models/groq.json                          |   87 -
 .../models/martian.json                       |   19 -
 .../models/mistral.json                       |   47 -
 .../models/nvidia.json                        |   21 -
 .../models/openai.json                        |  143 -
 .../models/openrouter.json                    |   92 -
 .../engine-management-extension/package.json  |   47 -
 .../resources/anthropic.json                  |   23 -
 .../resources/cohere.json                     |   23 -
 .../resources/deepseek.json                   |   23 -
 .../resources/google_gemini.json              |   23 -
 .../resources/groq.json                       |   23 -
 .../resources/martian.json                    |   23 -
 .../resources/mistral.json                    |   23 -
 .../resources/nvidia.json                     |   23 -
 .../resources/openai.json                     |   23 -
 .../resources/openrouter.json                 |   23 -
 .../rolldown.config.mjs                       |   44 -
 .../src/@types/global.d.ts                    |   23 -
 .../src/api.test.ts                           |  199 -
 .../src/error.test.ts                         |   19 -
 .../engine-management-extension/src/error.ts  |   10 -
 .../src/index.test.ts                         |  449 --
 .../engine-management-extension/src/index.ts  |  412 -
 .../src/node/index.ts                         |   69 -
 .../src/populateRemoteModels.test.ts          |  139 -
 .../src/utils.test.ts                         |   90 -
 .../engine-management-extension/src/utils.ts  |  105 -
 .../engine-management-extension/tsconfig.json |   16 -
 .../jest.config.js                            |    5 -
 .../package.json                              |   46 -
 .../rolldown.config.mjs                       |   16 -
 .../src/@types/global.d.ts                    |   11 -
 .../src/index.ts                              |   65 -
 .../tsconfig.json                             |   16 -
 extensions/llamacpp-extension/src/index.ts    |   35 +-
 extensions/model-extension/README.md          |   75 -
 extensions/model-extension/package.json       |   37 -
 .../model-extension/resources/default.json    | 6635 -----------------
 .../model-extension/resources/settings.json   |   14 -
 .../model-extension/rolldown.config.mjs       |   17 -
 .../model-extension/src/@types/global.d.ts    |   13 -
 extensions/model-extension/src/index.test.ts  |   88 -
 extensions/model-extension/src/index.ts       |  436 --
 .../model-extension/src/legacy/delete.ts      |   13 -
 .../src/legacy/model-json.test.ts             |   89 -
 .../model-extension/src/legacy/model-json.ts  |  141 -
 .../model-extension/src/migration.test.ts     |  160 -
 extensions/model-extension/tsconfig.json      |   15 -
 extensions/model-extension/vite.config.ts     |    8 -
 mise.toml                                     |    5 -
 web-app/src/containers/ChatInput.tsx          |    2 +-
 .../src/containers/DropdownModelProvider.tsx  |   10 +-
 web-app/src/containers/SettingsMenu.tsx       |    4 +
 .../src/containers/dialogs/DeleteProvider.tsx |    3 +-
 web-app/src/hooks/useChat.ts                  |   49 +-
 web-app/src/hooks/useModelProvider.ts         |    2 +-
 web-app/src/lib/completion.ts                 |   77 +-
 web-app/src/lib/model.spec.ts                 |    6 +-
 web-app/src/lib/models.ts                     |    9 -
 web-app/src/lib/utils.ts                      |   10 +-
 web-app/src/providers/DataProvider.tsx        |    8 +-
 web-app/src/routes/hub.tsx                    |   10 +-
 web-app/src/routes/index.tsx                  |    2 +-
 .../settings/providers/$providerName.tsx      |  101 +-
 web-app/src/routes/system-monitor.tsx         |   17 +-
 web-app/src/services/models.ts                |  296 +-
 web-app/src/services/providers.ts             |   55 +-
 web-app/src/services/threads.ts               |    6 +-
 web-app/src/types/models.ts                   |   10 -
 84 files changed, 291 insertions(+), 12170 deletions(-)
 delete mode 100644 core/src/browser/extensions/enginesManagement.test.ts
 delete mode 100644 core/src/browser/extensions/enginesManagement.ts
 delete mode 100644 core/src/browser/extensions/hardwareManagement.test.ts
 delete mode 100644 core/src/browser/extensions/hardwareManagement.ts
 delete mode 100644 core/src/browser/extensions/model.test.ts
 delete mode 100644 core/src/browser/extensions/model.ts
 delete mode 100644 extensions/engine-management-extension/engines.mjs
 delete mode 100644 extensions/engine-management-extension/jest.config.js
 delete mode 100644 extensions/engine-management-extension/models/anthropic.json
 delete mode 100644 extensions/engine-management-extension/models/cohere.json
 delete mode 100644 extensions/engine-management-extension/models/deepseek.json
 delete mode 100644 extensions/engine-management-extension/models/google_gemini.json
 delete mode 100644 extensions/engine-management-extension/models/groq.json
 delete mode 100644 extensions/engine-management-extension/models/martian.json
 delete mode 100644 extensions/engine-management-extension/models/mistral.json
 delete mode 100644 extensions/engine-management-extension/models/nvidia.json
 delete mode 100644 extensions/engine-management-extension/models/openai.json
 delete mode 100644 extensions/engine-management-extension/models/openrouter.json
 delete mode 100644 extensions/engine-management-extension/package.json
 delete mode 100644 extensions/engine-management-extension/resources/anthropic.json
 delete mode 100644 extensions/engine-management-extension/resources/cohere.json
 delete mode 100644 extensions/engine-management-extension/resources/deepseek.json
 delete mode 100644 extensions/engine-management-extension/resources/google_gemini.json
 delete mode 100644 extensions/engine-management-extension/resources/groq.json
 delete mode 100644 extensions/engine-management-extension/resources/martian.json
 delete mode 100644 extensions/engine-management-extension/resources/mistral.json
 delete mode 100644 extensions/engine-management-extension/resources/nvidia.json
 delete mode 100644 extensions/engine-management-extension/resources/openai.json
 delete mode 100644 extensions/engine-management-extension/resources/openrouter.json
 delete mode 100644 extensions/engine-management-extension/rolldown.config.mjs
 delete mode 100644 extensions/engine-management-extension/src/@types/global.d.ts
 delete mode 100644 extensions/engine-management-extension/src/api.test.ts
 delete mode 100644 extensions/engine-management-extension/src/error.test.ts
 delete mode 100644 extensions/engine-management-extension/src/error.ts
 delete mode 100644 extensions/engine-management-extension/src/index.test.ts
 delete mode 100644 extensions/engine-management-extension/src/index.ts
 delete mode 100644 extensions/engine-management-extension/src/node/index.ts
 delete mode 100644 extensions/engine-management-extension/src/populateRemoteModels.test.ts
 delete mode 100644 extensions/engine-management-extension/src/utils.test.ts
 delete mode 100644 extensions/engine-management-extension/src/utils.ts
 delete mode 100644 extensions/engine-management-extension/tsconfig.json
 delete mode 100644 extensions/hardware-management-extension/jest.config.js
 delete mode 100644 extensions/hardware-management-extension/package.json
 delete mode 100644 extensions/hardware-management-extension/rolldown.config.mjs
 delete mode 100644 extensions/hardware-management-extension/src/@types/global.d.ts
 delete mode 100644 extensions/hardware-management-extension/src/index.ts
 delete mode 100644 extensions/hardware-management-extension/tsconfig.json
 delete mode 100644 extensions/model-extension/README.md
 delete mode 100644 extensions/model-extension/package.json
 delete mode 100644 extensions/model-extension/resources/default.json
 delete mode 100644 extensions/model-extension/resources/settings.json
 delete mode 100644 extensions/model-extension/rolldown.config.mjs
 delete mode 100644 extensions/model-extension/src/@types/global.d.ts
 delete mode 100644 extensions/model-extension/src/index.test.ts
 delete mode 100644 extensions/model-extension/src/index.ts
 delete mode 100644 extensions/model-extension/src/legacy/delete.ts
 delete mode 100644 extensions/model-extension/src/legacy/model-json.test.ts
 delete mode 100644 extensions/model-extension/src/legacy/model-json.ts
 delete mode 100644 extensions/model-extension/src/migration.test.ts
 delete mode 100644 extensions/model-extension/tsconfig.json
 delete mode 100644 extensions/model-extension/vite.config.ts

diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts
index c9b9fa361..a0ce5669c 100644
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@@ -4,72 +4,72 @@ import { EngineManager } from './EngineManager'
 /* AIEngine class types */
 
 export interface chatCompletionRequestMessage {
-  role: 'system' | 'user' | 'assistant' | 'tool';
-  content: string | null | Content[]; // Content can be a string OR an array of content parts
-  name?: string;
-  tool_calls?: any[]; // Simplified tool_call_id?: string
+  role: 'system' | 'user' | 'assistant' | 'tool'
+  content: string | null | Content[] // Content can be a string OR an array of content parts
+  name?: string
+  tool_calls?: any[] // Simplified tool_call_id?: string
 }
 
 export interface Content {
-  type: 'text' | 'input_image' | 'input_audio';
-  text?: string;
-  image_url?: string;
-  input_audio?: InputAudio;
+  type: 'text' | 'input_image' | 'input_audio'
+  text?: string
+  image_url?: string
+  input_audio?: InputAudio
 }
 
 export interface InputAudio {
-  data: string; // Base64 encoded audio data
-  format: 'mp3' | 'wav' | 'ogg' | 'flac'; // Add more formats as needed/llama-server seems to support mp3
+  data: string // Base64 encoded audio data
+  format: 'mp3' | 'wav' | 'ogg' | 'flac' // Add more formats as needed/llama-server seems to support mp3
 }
 
 export interface chatCompletionRequest {
-  model: string; // Model ID, though for local it might be implicit via sessionInfo
-  messages: chatCompletionRequestMessage[];
+  model: string // Model ID, though for local it might be implicit via sessionInfo
+  messages: chatCompletionRequestMessage[]
 
   // Core sampling parameters
-  temperature?: number | null;
-  dynatemp_range?: number | null;
-  dynatemp_exponent?: number | null;
-  top_k?: number | null;
-  top_p?: number | null;
-  min_p?: number | null;
-  typical_p?: number | null;
-  repeat_penalty?: number | null;
-  repeat_last_n?: number | null;
-  presence_penalty?: number | null;
-  frequency_penalty?: number | null;
-  dry_multiplier?: number | null;
-  dry_base?: number | null;
-  dry_allowed_length?: number | null;
-  dry_penalty_last_n?: number | null;
-  dry_sequence_breakers?: string[] | null;
-  xtc_probability?: number | null;
-  xtc_threshold?: number | null;
-  mirostat?: number | null; // 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0
-  mirostat_tau?: number | null;
-  mirostat_eta?: number | null;
+  temperature?: number | null
+  dynatemp_range?: number | null
+  dynatemp_exponent?: number | null
+  top_k?: number | null
+  top_p?: number | null
+  min_p?: number | null
+  typical_p?: number | null
+  repeat_penalty?: number | null
+  repeat_last_n?: number | null
+  presence_penalty?: number | null
+  frequency_penalty?: number | null
+  dry_multiplier?: number | null
+  dry_base?: number | null
+  dry_allowed_length?: number | null
+  dry_penalty_last_n?: number | null
+  dry_sequence_breakers?: string[] | null
+  xtc_probability?: number | null
+  xtc_threshold?: number | null
+  mirostat?: number | null // 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0
+  mirostat_tau?: number | null
+  mirostat_eta?: number | null
 
-  n_predict?: number | null;
-  n_indent?: number | null;
-  n_keep?: number | null;
-  stream?: boolean | null;
-  stop?: string | string[] | null;
-  seed?: number | null; // RNG seed
+  n_predict?: number | null
+  n_indent?: number | null
+  n_keep?: number | null
+  stream?: boolean | null
+  stop?: string | string[] | null
+  seed?: number | null // RNG seed
 
   // Advanced sampling
-  logit_bias?: { [key: string]: number } | null;
-  n_probs?: number | null;
-  min_keep?: number | null;
-  t_max_predict_ms?: number | null;
-  image_data?: Array<{ data: string; id: number }> | null;
+  logit_bias?: { [key: string]: number } | null
+  n_probs?: number | null
+  min_keep?: number | null
+  t_max_predict_ms?: number | null
+  image_data?: Array<{ data: string; id: number }> | null
 
   // Internal/optimization parameters
-  id_slot?: number | null;
-  cache_prompt?: boolean | null;
-  return_tokens?: boolean | null;
-  samplers?: string[] | null;
-  timings_per_token?: boolean | null;
-  post_sampling_probs?: boolean | null;
+  id_slot?: number | null
+  cache_prompt?: boolean | null
+  return_tokens?: boolean | null
+  samplers?: string[] | null
+  timings_per_token?: boolean | null
+  post_sampling_probs?: boolean | null
 }
 
 export interface chatCompletionChunkChoiceDelta {
@@ -208,7 +208,9 @@ export abstract class AIEngine extends BaseExtension {
   /**
    * Sends a chat request to the model
    */
-  abstract chat(opts: chatCompletionRequest): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>>
+  abstract chat(
+    opts: chatCompletionRequest
+  ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>>
 
   /**
    * Deletes a model
diff --git a/core/src/browser/extensions/enginesManagement.test.ts b/core/src/browser/extensions/enginesManagement.test.ts
deleted file mode 100644
index 2a7880992..000000000
--- a/core/src/browser/extensions/enginesManagement.test.ts
+++ /dev/null
@@ -1,566 +0,0 @@
-import { EngineManagementExtension } from './enginesManagement'
-import { ExtensionTypeEnum } from '../extension'
-import {
-  EngineConfig,
-  EngineReleased,
-  EngineVariant,
-  Engines,
-  InferenceEngine,
-  DefaultEngineVariant,
-  Model
-} from '../../types'
-
-// Mock implementation of EngineManagementExtension
-class MockEngineManagementExtension extends EngineManagementExtension {
-  private mockEngines: Engines = {
-    llama: {
-      name: 'llama',
-      variants: [
-        {
-          variant: 'cpu',
-          version: '1.0.0',
-          path: '/engines/llama/cpu/1.0.0',
-          installed: true
-        },
-        {
-          variant: 'cuda',
-          version: '1.0.0',
-          path: '/engines/llama/cuda/1.0.0',
-          installed: false
-        }
-      ],
-      default: {
-        variant: 'cpu',
-        version: '1.0.0'
-      }
-    },
-    gpt4all: {
-      name: 'gpt4all',
-      variants: [
-        {
-          variant: 'cpu',
-          version: '2.0.0',
-          path: '/engines/gpt4all/cpu/2.0.0',
-          installed: true
-        }
-      ],
-      default: {
-        variant: 'cpu',
-        version: '2.0.0'
-      }
-    }
-  }
-
-  private mockReleases: { [key: string]: EngineReleased[] } = {
-    'llama-1.0.0': [
-      {
-        variant: 'cpu',
-        version: '1.0.0',
-        os: ['macos', 'linux', 'windows'],
-        url: 'https://example.com/llama/1.0.0/cpu'
-      },
-      {
-        variant: 'cuda',
-        version: '1.0.0',
-        os: ['linux', 'windows'],
-        url: 'https://example.com/llama/1.0.0/cuda'
-      }
-    ],
-    'llama-1.1.0': [
-      {
-        variant: 'cpu',
-        version: '1.1.0',
-        os: ['macos', 'linux', 'windows'],
-        url: 'https://example.com/llama/1.1.0/cpu'
-      },
-      {
-        variant: 'cuda',
-        version: '1.1.0',
-        os: ['linux', 'windows'],
-        url: 'https://example.com/llama/1.1.0/cuda'
-      }
-    ],
-    'gpt4all-2.0.0': [
-      {
-        variant: 'cpu',
-        version: '2.0.0',
-        os: ['macos', 'linux', 'windows'],
-        url: 'https://example.com/gpt4all/2.0.0/cpu'
-      }
-    ]
-  }
-
-  private remoteModels: { [engine: string]: Model[] } = {
-    'llama': [],
-    'gpt4all': []
-  }
-
-  constructor() {
-    super('http://mock-url.com', 'mock-engine-extension', 'Mock Engine Extension', true, 'A mock engine extension', '1.0.0')
-  }
-
-  onLoad(): void {
-    // Mock implementation
-  }
-
-  onUnload(): void {
-    // Mock implementation
-  }
-
-  async getEngines(): Promise<Engines> {
-    return JSON.parse(JSON.stringify(this.mockEngines))
-  }
-
-  async getInstalledEngines(name: InferenceEngine): Promise<EngineVariant[]> {
-    if (!this.mockEngines[name]) {
-      return []
-    }
-    
-    return this.mockEngines[name].variants.filter(variant => variant.installed)
-  }
-
-  async getReleasedEnginesByVersion(
-    name: InferenceEngine,
-    version: string,
-    platform?: string
-  ): Promise<EngineReleased[]> {
-    const key = `${name}-${version}`
-    let releases = this.mockReleases[key] || []
-    
-    if (platform) {
-      releases = releases.filter(release => release.os.includes(platform))
-    }
-    
-    return releases
-  }
-
-  async getLatestReleasedEngine(
-    name: InferenceEngine,
-    platform?: string
-  ): Promise<EngineReleased[]> {
-    // For mock, let's assume latest versions are 1.1.0 for llama and 2.0.0 for gpt4all
-    const latestVersions = {
-      'llama': '1.1.0',
-      'gpt4all': '2.0.0'
-    }
-    
-    if (!latestVersions[name]) {
-      return []
-    }
-    
-    return this.getReleasedEnginesByVersion(name, latestVersions[name], platform)
-  }
-
-  async installEngine(
-    name: string,
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }> {
-    if (!this.mockEngines[name]) {
-      this.mockEngines[name] = {
-        name,
-        variants: [],
-        default: {
-          variant: engineConfig.variant,
-          version: engineConfig.version
-        }
-      }
-    }
-    
-    // Check if variant already exists
-    const existingVariantIndex = this.mockEngines[name].variants.findIndex(
-      v => v.variant === engineConfig.variant && v.version === engineConfig.version
-    )
-    
-    if (existingVariantIndex >= 0) {
-      this.mockEngines[name].variants[existingVariantIndex].installed = true
-    } else {
-      this.mockEngines[name].variants.push({
-        variant: engineConfig.variant,
-        version: engineConfig.version,
-        path: `/engines/${name}/${engineConfig.variant}/${engineConfig.version}`,
-        installed: true
-      })
-    }
-    
-    return { messages: `Successfully installed ${name} ${engineConfig.variant} ${engineConfig.version}` }
-  }
-
-  async addRemoteEngine(
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }> {
-    const name = engineConfig.name || 'remote-engine'
-    
-    if (!this.mockEngines[name]) {
-      this.mockEngines[name] = {
-        name,
-        variants: [],
-        default: {
-          variant: engineConfig.variant,
-          version: engineConfig.version
-        }
-      }
-    }
-    
-    this.mockEngines[name].variants.push({
-      variant: engineConfig.variant,
-      version: engineConfig.version,
-      path: engineConfig.path || `/engines/${name}/${engineConfig.variant}/${engineConfig.version}`,
-      installed: true,
-      url: engineConfig.url
-    })
-    
-    return { messages: `Successfully added remote engine ${name}` }
-  }
-
-  async uninstallEngine(
-    name: InferenceEngine,
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }> {
-    if (!this.mockEngines[name]) {
-      return { messages: `Engine ${name} not found` }
-    }
-    
-    const variantIndex = this.mockEngines[name].variants.findIndex(
-      v => v.variant === engineConfig.variant && v.version === engineConfig.version
-    )
-    
-    if (variantIndex >= 0) {
-      this.mockEngines[name].variants[variantIndex].installed = false
-      
-      // If this was the default variant, reset default
-      if (
-        this.mockEngines[name].default.variant === engineConfig.variant &&
-        this.mockEngines[name].default.version === engineConfig.version
-      ) {
-        // Find another installed variant to set as default
-        const installedVariant = this.mockEngines[name].variants.find(v => v.installed)
-        if (installedVariant) {
-          this.mockEngines[name].default = {
-            variant: installedVariant.variant,
-            version: installedVariant.version
-          }
-        } else {
-          // No installed variants remain, clear default
-          this.mockEngines[name].default = { variant: '', version: '' }
-        }
-      }
-      
-      return { messages: `Successfully uninstalled ${name} ${engineConfig.variant} ${engineConfig.version}` }
-    } else {
-      return { messages: `Variant ${engineConfig.variant} ${engineConfig.version} not found for engine ${name}` }
-    }
-  }
-
-  async getDefaultEngineVariant(
-    name: InferenceEngine
-  ): Promise<DefaultEngineVariant> {
-    if (!this.mockEngines[name]) {
-      return { variant: '', version: '' }
-    }
-    
-    return this.mockEngines[name].default
-  }
-
-  async setDefaultEngineVariant(
-    name: InferenceEngine,
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }> {
-    if (!this.mockEngines[name]) {
-      return { messages: `Engine ${name} not found` }
-    }
-    
-    const variantExists = this.mockEngines[name].variants.some(
-      v => v.variant === engineConfig.variant && v.version === engineConfig.version && v.installed
-    )
-    
-    if (!variantExists) {
-      return { messages: `Variant ${engineConfig.variant} ${engineConfig.version} not found or not installed` }
-    }
-    
-    this.mockEngines[name].default = {
-      variant: engineConfig.variant,
-      version: engineConfig.version
-    }
-    
-    return { messages: `Successfully set ${engineConfig.variant} ${engineConfig.version} as default for ${name}` }
-  }
-
-  async updateEngine(
-    name: InferenceEngine,
-    engineConfig?: EngineConfig
-  ): Promise<{ messages: string }> {
-    if (!this.mockEngines[name]) {
-      return { messages: `Engine ${name} not found` }
-    }
-    
-    if (!engineConfig) {
-      // Assume we're updating to the latest version
-      return { messages: `Successfully updated ${name} to the latest version` }
-    }
-    
-    const variantIndex = this.mockEngines[name].variants.findIndex(
-      v => v.variant === engineConfig.variant && v.installed
-    )
-    
-    if (variantIndex >= 0) {
-      // Update the version
-      this.mockEngines[name].variants[variantIndex].version = engineConfig.version
-      
-      // If this was the default variant, update default version too
-      if (this.mockEngines[name].default.variant === engineConfig.variant) {
-        this.mockEngines[name].default.version = engineConfig.version
-      }
-      
-      return { messages: `Successfully updated ${name} ${engineConfig.variant} to version ${engineConfig.version}` }
-    } else {
-      return { messages: `Installed variant ${engineConfig.variant} not found for engine ${name}` }
-    }
-  }
-
-  async addRemoteModel(model: Model): Promise<void> {
-    const engine = model.engine as string
-    
-    if (!this.remoteModels[engine]) {
-      this.remoteModels[engine] = []
-    }
-    
-    this.remoteModels[engine].push(model)
-  }
-
-  async getRemoteModels(name: InferenceEngine | string): Promise<Model[]> {
-    return this.remoteModels[name] || []
-  }
-}
-
-describe('EngineManagementExtension', () => {
-  let extension: MockEngineManagementExtension
-
-  beforeEach(() => {
-    extension = new MockEngineManagementExtension()
-  })
-
-  test('should return the correct extension type', () => {
-    expect(extension.type()).toBe(ExtensionTypeEnum.Engine)
-  })
-
-  test('should get all engines', async () => {
-    const engines = await extension.getEngines()
-    
-    expect(engines).toBeDefined()
-    expect(engines.llama).toBeDefined()
-    expect(engines.gpt4all).toBeDefined()
-    expect(engines.llama.variants).toHaveLength(2)
-    expect(engines.gpt4all.variants).toHaveLength(1)
-  })
-
-  test('should get installed engines', async () => {
-    const llamaEngines = await extension.getInstalledEngines('llama')
-    
-    expect(llamaEngines).toHaveLength(1)
-    expect(llamaEngines[0].variant).toBe('cpu')
-    expect(llamaEngines[0].installed).toBe(true)
-    
-    const gpt4allEngines = await extension.getInstalledEngines('gpt4all')
-    
-    expect(gpt4allEngines).toHaveLength(1)
-    expect(gpt4allEngines[0].variant).toBe('cpu')
-    expect(gpt4allEngines[0].installed).toBe(true)
-    
-    // Test non-existent engine
-    const nonExistentEngines = await extension.getInstalledEngines('non-existent' as InferenceEngine)
-    expect(nonExistentEngines).toHaveLength(0)
-  })
-
-  test('should get released engines by version', async () => {
-    const llamaReleases = await extension.getReleasedEnginesByVersion('llama', '1.0.0')
-    
-    expect(llamaReleases).toHaveLength(2)
-    expect(llamaReleases[0].variant).toBe('cpu')
-    expect(llamaReleases[1].variant).toBe('cuda')
-    
-    // Test with platform filter
-    const llamaLinuxReleases = await extension.getReleasedEnginesByVersion('llama', '1.0.0', 'linux')
-    
-    expect(llamaLinuxReleases).toHaveLength(2)
-    
-    const llamaMacReleases = await extension.getReleasedEnginesByVersion('llama', '1.0.0', 'macos')
-    
-    expect(llamaMacReleases).toHaveLength(1)
-    expect(llamaMacReleases[0].variant).toBe('cpu')
-    
-    // Test non-existent version
-    const nonExistentReleases = await extension.getReleasedEnginesByVersion('llama', '9.9.9')
-    expect(nonExistentReleases).toHaveLength(0)
-  })
-
-  test('should get latest released engines', async () => {
-    const latestLlamaReleases = await extension.getLatestReleasedEngine('llama')
-    
-    expect(latestLlamaReleases).toHaveLength(2)
-    expect(latestLlamaReleases[0].version).toBe('1.1.0')
-    
-    // Test with platform filter
-    const latestLlamaMacReleases = await extension.getLatestReleasedEngine('llama', 'macos')
-    
-    expect(latestLlamaMacReleases).toHaveLength(1)
-    expect(latestLlamaMacReleases[0].variant).toBe('cpu')
-    expect(latestLlamaMacReleases[0].version).toBe('1.1.0')
-    
-    // Test non-existent engine
-    const nonExistentReleases = await extension.getLatestReleasedEngine('non-existent' as InferenceEngine)
-    expect(nonExistentReleases).toHaveLength(0)
-  })
-
-  test('should install engine', async () => {
-    // Install existing engine variant that is not installed
-    const result = await extension.installEngine('llama', { variant: 'cuda', version: '1.0.0' })
-    
-    expect(result.messages).toContain('Successfully installed')
-    
-    const installedEngines = await extension.getInstalledEngines('llama')
-    expect(installedEngines).toHaveLength(2)
-    expect(installedEngines.some(e => e.variant === 'cuda')).toBe(true)
-    
-    // Install non-existent engine
-    const newEngineResult = await extension.installEngine('new-engine', { variant: 'cpu', version: '1.0.0' })
-    
-    expect(newEngineResult.messages).toContain('Successfully installed')
-    
-    const engines = await extension.getEngines()
-    expect(engines['new-engine']).toBeDefined()
-    expect(engines['new-engine'].variants).toHaveLength(1)
-    expect(engines['new-engine'].variants[0].installed).toBe(true)
-  })
-
-  test('should add remote engine', async () => {
-    const result = await extension.addRemoteEngine({
-      name: 'remote-llm',
-      variant: 'remote',
-      version: '1.0.0',
-      url: 'https://example.com/remote-llm-api'
-    })
-    
-    expect(result.messages).toContain('Successfully added remote engine')
-    
-    const engines = await extension.getEngines()
-    expect(engines['remote-llm']).toBeDefined()
-    expect(engines['remote-llm'].variants).toHaveLength(1)
-    expect(engines['remote-llm'].variants[0].url).toBe('https://example.com/remote-llm-api')
-  })
-
-  test('should uninstall engine', async () => {
-    const result = await extension.uninstallEngine('llama', { variant: 'cpu', version: '1.0.0' })
-    
-    expect(result.messages).toContain('Successfully uninstalled')
-    
-    const installedEngines = await extension.getInstalledEngines('llama')
-    expect(installedEngines).toHaveLength(0)
-    
-    // Test uninstalling non-existent variant
-    const nonExistentResult = await extension.uninstallEngine('llama', { variant: 'non-existent', version: '1.0.0' })
-    
-    expect(nonExistentResult.messages).toContain('not found')
-  })
-
-  test('should handle default variant when uninstalling', async () => {
-    // First install cuda variant
-    await extension.installEngine('llama', { variant: 'cuda', version: '1.0.0' })
-    
-    // Set cuda as default
-    await extension.setDefaultEngineVariant('llama', { variant: 'cuda', version: '1.0.0' })
-    
-    // Check that cuda is now default
-    let defaultVariant = await extension.getDefaultEngineVariant('llama')
-    expect(defaultVariant.variant).toBe('cuda')
-    
-    // Uninstall cuda
-    await extension.uninstallEngine('llama', { variant: 'cuda', version: '1.0.0' })
-    
-    // Check that default has changed to another installed variant
-    defaultVariant = await extension.getDefaultEngineVariant('llama')
-    expect(defaultVariant.variant).toBe('cpu')
-    
-    // Uninstall all variants
-    await extension.uninstallEngine('llama', { variant: 'cpu', version: '1.0.0' })
-    
-    // Check that default is now empty
-    defaultVariant = await extension.getDefaultEngineVariant('llama')
-    expect(defaultVariant.variant).toBe('')
-    expect(defaultVariant.version).toBe('')
-  })
-
-  test('should get default engine variant', async () => {
-    const llamaDefault = await extension.getDefaultEngineVariant('llama')
-    
-    expect(llamaDefault.variant).toBe('cpu')
-    expect(llamaDefault.version).toBe('1.0.0')
-    
-    // Test non-existent engine
-    const nonExistentDefault = await extension.getDefaultEngineVariant('non-existent' as InferenceEngine)
-    expect(nonExistentDefault.variant).toBe('')
-    expect(nonExistentDefault.version).toBe('')
-  })
-
-  test('should set default engine variant', async () => {
-    // Install cuda variant
-    await extension.installEngine('llama', { variant: 'cuda', version: '1.0.0' })
-    
-    const result = await extension.setDefaultEngineVariant('llama', { variant: 'cuda', version: '1.0.0' })
-    
-    expect(result.messages).toContain('Successfully set')
-    
-    const defaultVariant = await extension.getDefaultEngineVariant('llama')
-    expect(defaultVariant.variant).toBe('cuda')
-    expect(defaultVariant.version).toBe('1.0.0')
-    
-    // Test setting non-existent variant as default
-    const nonExistentResult = await extension.setDefaultEngineVariant('llama', { variant: 'non-existent', version: '1.0.0' })
-    
-    expect(nonExistentResult.messages).toContain('not found')
-  })
-
-  test('should update engine', async () => {
-    const result = await extension.updateEngine('llama', { variant: 'cpu', version: '1.1.0' })
-    
-    expect(result.messages).toContain('Successfully updated')
-    
-    const engines = await extension.getEngines()
-    const cpuVariant = engines.llama.variants.find(v => v.variant === 'cpu')
-    expect(cpuVariant).toBeDefined()
-    expect(cpuVariant?.version).toBe('1.1.0')
-    
-    // Default should also be updated since cpu was default
-    expect(engines.llama.default.version).toBe('1.1.0')
-    
-    // Test updating non-existent variant
-    const nonExistentResult = await extension.updateEngine('llama', { variant: 'non-existent', version: '1.1.0' })
-    
-    expect(nonExistentResult.messages).toContain('not found')
-  })
-
-  test('should add and get remote models', async () => {
-    const model: Model = {
-      id: 'remote-model-1',
-      name: 'Remote Model 1',
-      path: '/path/to/remote-model',
-      engine: 'llama',
-      format: 'gguf',
-      modelFormat: 'gguf',
-      source: 'remote',
-      status: 'ready',
-      contextLength: 4096,
-      sizeInGB: 4,
-      created: new Date().toISOString()
-    }
-    
-    await extension.addRemoteModel(model)
-    
-    const llamaModels = await extension.getRemoteModels('llama')
-    expect(llamaModels).toHaveLength(1)
-    expect(llamaModels[0].id).toBe('remote-model-1')
-    
-    // Test non-existent engine
-    const nonExistentModels = await extension.getRemoteModels('non-existent')
-    expect(nonExistentModels).toHaveLength(0)
-  })
-})
\ No newline at end of file
diff --git a/core/src/browser/extensions/enginesManagement.ts b/core/src/browser/extensions/enginesManagement.ts
deleted file mode 100644
index 0dbb418f4..000000000
--- a/core/src/browser/extensions/enginesManagement.ts
+++ /dev/null
@@ -1,115 +0,0 @@
-import {
-  Engines,
-  EngineVariant,
-  EngineReleased,
-  EngineConfig,
-  DefaultEngineVariant,
-  Model,
-} from '../../types'
-import { BaseExtension, ExtensionTypeEnum } from '../extension'
-
-/**
- * Engine management extension. Persists and retrieves engine management.
- * @abstract
- * @extends BaseExtension
- */
-export abstract class EngineManagementExtension extends BaseExtension {
-  type(): ExtensionTypeEnum | undefined {
-    return ExtensionTypeEnum.Engine
-  }
-
-  /**
-   * @returns A Promise that resolves to an object of list engines.
-   */
-  abstract getEngines(): Promise<Engines>
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to an array of installed engine.
-   */
-  abstract getInstalledEngines(name: string): Promise<EngineVariant[]>
-
-  /**
-   * @param name - Inference engine name.
-   * @param version - Version of the engine.
-   * @param platform - Optional to sort by operating system. macOS, linux, windows.
-   * @returns A Promise that resolves to an array of latest released engine by version.
-   */
-  abstract getReleasedEnginesByVersion(
-    name: string,
-    version: string,
-    platform?: string
-  ): Promise<EngineReleased[]>
-
-  /**
-   * @param name - Inference engine name.
-   * @param platform - Optional to sort by operating system. macOS, linux, windows.
-   * @returns A Promise that resolves to an array of latest released engine.
-   */
-  abstract getLatestReleasedEngine(
-    name: string,
-    platform?: string
-  ): Promise<EngineReleased[]>
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to intall of engine.
-   */
-  abstract installEngine(
-    name: string,
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }>
-
-  /**
-   * Add a new remote engine
-   * @returns A Promise that resolves to intall of engine.
-   */
-  abstract addRemoteEngine(
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }>
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to unintall of engine.
-   */
-  abstract uninstallEngine(
-    name: string,
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }>
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to an object of default engine.
-   */
-  abstract getDefaultEngineVariant(
-    name: string
-  ): Promise<DefaultEngineVariant>
-
-  /**
-   * @body variant - string
-   * @body version - string
-   * @returns A Promise that resolves to set default engine.
-   */
-  abstract setDefaultEngineVariant(
-    name: string,
-    engineConfig: EngineConfig
-  ): Promise<{ messages: string }>
-
-  /**
-   * @returns A Promise that resolves to update engine.
-   */
-  abstract updateEngine(
-    name: string,
-    engineConfig?: EngineConfig
-  ): Promise<{ messages: string }>
-
-  /**
-   * Add a new remote model for a specific engine
-   */
-  abstract addRemoteModel(model: Model): Promise<void>
-
-  /**
-   * @returns A Promise that resolves to an object of remote models list .
-   */
-  abstract getRemoteModels(name: string): Promise<any>
-}
diff --git a/core/src/browser/extensions/hardwareManagement.test.ts b/core/src/browser/extensions/hardwareManagement.test.ts
deleted file mode 100644
index 6ada06862..000000000
--- a/core/src/browser/extensions/hardwareManagement.test.ts
+++ /dev/null
@@ -1,146 +0,0 @@
-import { HardwareManagementExtension } from './hardwareManagement'
-import { ExtensionTypeEnum } from '../extension'
-import { HardwareInformation } from '../../types'
-
-// Mock implementation of HardwareManagementExtension
-class MockHardwareManagementExtension extends HardwareManagementExtension {
-  private activeGpus: number[] = [0]
-  private mockHardwareInfo: HardwareInformation = {
-    cpu: {
-      manufacturer: 'Mock CPU Manufacturer',
-      brand: 'Mock CPU',
-      cores: 8,
-      physicalCores: 4,
-      speed: 3.5,
-    },
-    memory: {
-      total: 16 * 1024 * 1024 * 1024, // 16GB in bytes
-      free: 8 * 1024 * 1024 * 1024, // 8GB in bytes
-    },
-    gpus: [
-      {
-        id: 0,
-        vendor: 'Mock GPU Vendor',
-        model: 'Mock GPU Model 1',
-        memory: 8 * 1024 * 1024 * 1024, // 8GB in bytes
-      },
-      {
-        id: 1,
-        vendor: 'Mock GPU Vendor',
-        model: 'Mock GPU Model 2',
-        memory: 4 * 1024 * 1024 * 1024, // 4GB in bytes
-      }
-    ],
-    active_gpus: [0],
-  }
-
-  constructor() {
-    super('http://mock-url.com', 'mock-hardware-extension', 'Mock Hardware Extension', true, 'A mock hardware extension', '1.0.0')
-  }
-
-  onLoad(): void {
-    // Mock implementation
-  }
-
-  onUnload(): void {
-    // Mock implementation
-  }
-
-  async getHardware(): Promise<HardwareInformation> {
-    // Return a copy to prevent test side effects
-    return JSON.parse(JSON.stringify(this.mockHardwareInfo))
-  }
-
-  async setAvtiveGpu(data: { gpus: number[] }): Promise<{
-    message: string
-    activated_gpus: number[]
-  }> {
-    // Validate GPUs exist
-    const validGpus = data.gpus.filter(gpuId => 
-      this.mockHardwareInfo.gpus.some(gpu => gpu.id === gpuId)
-    )
-
-    if (validGpus.length === 0) {
-      throw new Error('No valid GPUs selected')
-    }
-    
-    // Update active GPUs
-    this.activeGpus = validGpus
-    this.mockHardwareInfo.active_gpus = validGpus
-    
-    return {
-      message: 'GPU activation successful',
-      activated_gpus: validGpus
-    }
-  }
-}
-
-describe('HardwareManagementExtension', () => {
-  let extension: MockHardwareManagementExtension
-
-  beforeEach(() => {
-    extension = new MockHardwareManagementExtension()
-  })
-
-  test('should return the correct extension type', () => {
-    expect(extension.type()).toBe(ExtensionTypeEnum.Hardware)
-  })
-
-  test('should get hardware information', async () => {
-    const hardwareInfo = await extension.getHardware()
-    
-    // Check CPU info
-    expect(hardwareInfo.cpu).toBeDefined()
-    expect(hardwareInfo.cpu.manufacturer).toBe('Mock CPU Manufacturer')
-    expect(hardwareInfo.cpu.cores).toBe(8)
-    
-    // Check memory info
-    expect(hardwareInfo.memory).toBeDefined()
-    expect(hardwareInfo.memory.total).toBe(16 * 1024 * 1024 * 1024)
-    
-    // Check GPU info
-    expect(hardwareInfo.gpus).toHaveLength(2)
-    expect(hardwareInfo.gpus[0].model).toBe('Mock GPU Model 1')
-    expect(hardwareInfo.gpus[1].model).toBe('Mock GPU Model 2')
-    
-    // Check active GPUs
-    expect(hardwareInfo.active_gpus).toEqual([0])
-  })
-
-  test('should set active GPUs', async () => {
-    const result = await extension.setAvtiveGpu({ gpus: [1] })
-    
-    expect(result.message).toBe('GPU activation successful')
-    expect(result.activated_gpus).toEqual([1])
-    
-    // Verify the change in hardware info
-    const hardwareInfo = await extension.getHardware()
-    expect(hardwareInfo.active_gpus).toEqual([1])
-  })
-
-  test('should set multiple active GPUs', async () => {
-    const result = await extension.setAvtiveGpu({ gpus: [0, 1] })
-    
-    expect(result.message).toBe('GPU activation successful')
-    expect(result.activated_gpus).toEqual([0, 1])
-    
-    // Verify the change in hardware info
-    const hardwareInfo = await extension.getHardware()
-    expect(hardwareInfo.active_gpus).toEqual([0, 1])
-  })
-
-  test('should throw error for invalid GPU ids', async () => {
-    await expect(extension.setAvtiveGpu({ gpus: [999] })).rejects.toThrow('No valid GPUs selected')
-  })
-
-  test('should handle mix of valid and invalid GPU ids', async () => {
-    const result = await extension.setAvtiveGpu({ gpus: [0, 999] })
-    
-    // Should only activate valid GPUs
-    expect(result.activated_gpus).toEqual([0])
-    
-    // Verify the change in hardware info
-    const hardwareInfo = await extension.getHardware()
-    expect(hardwareInfo.active_gpus).toEqual([0])
-  })
-})
\ No newline at end of file
diff --git a/core/src/browser/extensions/hardwareManagement.ts b/core/src/browser/extensions/hardwareManagement.ts
deleted file mode 100644
index 5de3c9257..000000000
--- a/core/src/browser/extensions/hardwareManagement.ts
+++ /dev/null
@@ -1,26 +0,0 @@
-import { HardwareInformation } from '../../types'
-import { BaseExtension, ExtensionTypeEnum } from '../extension'
-
-/**
- * Engine management extension. Persists and retrieves engine management.
- * @abstract
- * @extends BaseExtension
- */
-export abstract class HardwareManagementExtension extends BaseExtension {
-  type(): ExtensionTypeEnum | undefined {
-    return ExtensionTypeEnum.Hardware
-  }
-
-  /**
-   * @returns A Promise that resolves to an object of list hardware.
-   */
-  abstract getHardware(): Promise<HardwareInformation>
-
-  /**
-   * @returns A Promise that resolves to an object of set active gpus.
-   */
-  abstract setActiveGpu(data: { gpus: number[] }): Promise<{
-    message: string
-    activated_gpus: number[]
-  }>
-}
diff --git a/core/src/browser/extensions/index.ts b/core/src/browser/extensions/index.ts
index f11c7b09f..6ee3baff7 100644
--- a/core/src/browser/extensions/index.ts
+++ b/core/src/browser/extensions/index.ts
@@ -9,29 +9,12 @@ export { ConversationalExtension } from './conversational'
  */
 export { InferenceExtension } from './inference'
 
-
-
 /**
  * Assistant extension for managing assistants.
  */
 export { AssistantExtension } from './assistant'
 
-/**
- * Model extension for managing models.
- */
-export { ModelExtension } from './model'
-
 /**
  * Base AI Engines.
  */
 export * from './engines'
-
-/**
- *  Engines Management
- */
-export * from './enginesManagement'
-
-/**
- *  Hardware Management
- */
-export * from './hardwareManagement'
diff --git a/core/src/browser/extensions/model.test.ts b/core/src/browser/extensions/model.test.ts
deleted file mode 100644
index bc045419d..000000000
--- a/core/src/browser/extensions/model.test.ts
+++ /dev/null
@@ -1,286 +0,0 @@
-import { ModelExtension } from './model'
-import { ExtensionTypeEnum } from '../extension'
-import { Model, OptionType, ModelSource } from '../../types'
-
-// Mock implementation of ModelExtension
-class MockModelExtension extends ModelExtension {
-  private models: Model[] = []
-  private sources: ModelSource[] = []
-  private loadedModels: Set<string> = new Set()
-  private modelsPulling: Set<string> = new Set()
-
-  constructor() {
-    super('http://mock-url.com', 'mock-model-extension', 'Mock Model Extension', true, 'A mock model extension', '1.0.0')
-  }
-
-  onLoad(): void {
-    // Mock implementation
-  }
-
-  onUnload(): void {
-    // Mock implementation
-  }
-
-  async configurePullOptions(configs: { [key: string]: any }): Promise<any> {
-    return configs
-  }
-
-  async getModels(): Promise<Model[]> {
-    return this.models
-  }
-
-  async pullModel(model: string, id?: string, name?: string): Promise<void> {
-    const modelId = id || `model-${Date.now()}`
-    this.modelsPulling.add(modelId)
-    
-    // Simulate model pull by adding it to the model list
-    const newModel: Model = {
-      id: modelId,
-      path: `/models/${model}`,
-      name: name || model,
-      source: 'mock-source',
-      modelFormat: 'mock-format',
-      engine: 'mock-engine',
-      format: 'mock-format',
-      status: 'ready',
-      contextLength: 2048,
-      sizeInGB: 2,
-      created: new Date().toISOString(),
-      pullProgress: {
-        percent: 100,
-        transferred: 0,
-        total: 0
-      }
-    }
-    
-    this.models.push(newModel)
-    this.loadedModels.add(modelId)
-    this.modelsPulling.delete(modelId)
-  }
-
-  async cancelModelPull(modelId: string): Promise<void> {
-    this.modelsPulling.delete(modelId)
-    // Remove the model if it's in the pulling state
-    this.models = this.models.filter(m => m.id !== modelId)
-  }
-
-  async importModel(
-    model: string,
-    modelPath: string,
-    name?: string,
-    optionType?: OptionType
-  ): Promise<void> {
-    const newModel: Model = {
-      id: `model-${Date.now()}`,
-      path: modelPath,
-      name: name || model,
-      source: 'local',
-      modelFormat: optionType?.format || 'mock-format',
-      engine: optionType?.engine || 'mock-engine',
-      format: optionType?.format || 'mock-format',
-      status: 'ready',
-      contextLength: optionType?.contextLength || 2048,
-      sizeInGB: 2,
-      created: new Date().toISOString(),
-    }
-    
-    this.models.push(newModel)
-    this.loadedModels.add(newModel.id)
-  }
-
-  async updateModel(modelInfo: Partial<Model>): Promise<Model> {
-    if (!modelInfo.id) throw new Error('Model ID is required')
-    
-    const index = this.models.findIndex(m => m.id === modelInfo.id)
-    if (index === -1) throw new Error('Model not found')
-    
-    this.models[index] = { ...this.models[index], ...modelInfo }
-    return this.models[index]
-  }
-
-  async deleteModel(modelId: string): Promise<void> {
-    this.models = this.models.filter(m => m.id !== modelId)
-    this.loadedModels.delete(modelId)
-  }
-
-  async isModelLoaded(modelId: string): Promise<boolean> {
-    return this.loadedModels.has(modelId)
-  }
-
-  async getSources(): Promise<ModelSource[]> {
-    return this.sources
-  }
-
-  async addSource(source: string): Promise<void> {
-    const newSource: ModelSource = {
-      id: `source-${Date.now()}`,
-      url: source,
-      name: `Source ${this.sources.length + 1}`,
-      type: 'mock-type'
-    }
-    
-    this.sources.push(newSource)
-  }
-
-  async deleteSource(sourceId: string): Promise<void> {
-    this.sources = this.sources.filter(s => s.id !== sourceId)
-  }
-}
-
-describe('ModelExtension', () => {
-  let extension: MockModelExtension
-
-  beforeEach(() => {
-    extension = new MockModelExtension()
-  })
-
-  test('should return the correct extension type', () => {
-    expect(extension.type()).toBe(ExtensionTypeEnum.Model)
-  })
-
-  test('should configure pull options', async () => {
-    const configs = { apiKey: 'test-key', baseUrl: 'https://test-url.com' }
-    const result = await extension.configurePullOptions(configs)
-    expect(result).toEqual(configs)
-  })
-
-  test('should add and get models', async () => {
-    await extension.pullModel('test-model', 'test-id', 'Test Model')
-    
-    const models = await extension.getModels()
-    expect(models).toHaveLength(1)
-    expect(models[0].id).toBe('test-id')
-    expect(models[0].name).toBe('Test Model')
-  })
-
-  test('should pull model with default id and name', async () => {
-    await extension.pullModel('test-model')
-    
-    const models = await extension.getModels()
-    expect(models).toHaveLength(1)
-    expect(models[0].name).toBe('test-model')
-  })
-
-  test('should cancel model pull', async () => {
-    await extension.pullModel('test-model', 'test-id')
-    
-    // Verify model exists
-    let models = await extension.getModels()
-    expect(models).toHaveLength(1)
-    
-    // Cancel the pull
-    await extension.cancelModelPull('test-id')
-    
-    // Verify model was removed
-    models = await extension.getModels()
-    expect(models).toHaveLength(0)
-  })
-
-  test('should import model', async () => {
-    const optionType: OptionType = {
-      engine: 'test-engine',
-      format: 'test-format',
-      contextLength: 4096
-    }
-    
-    await extension.importModel('test-model', '/path/to/model', 'Imported Model', optionType)
-    
-    const models = await extension.getModels()
-    expect(models).toHaveLength(1)
-    expect(models[0].name).toBe('Imported Model')
-    expect(models[0].engine).toBe('test-engine')
-    expect(models[0].format).toBe('test-format')
-    expect(models[0].contextLength).toBe(4096)
-  })
-
-  test('should import model with default values', async () => {
-    await extension.importModel('test-model', '/path/to/model')
-    
-    const models = await extension.getModels()
-    expect(models).toHaveLength(1)
-    expect(models[0].name).toBe('test-model')
-    expect(models[0].engine).toBe('mock-engine')
-    expect(models[0].format).toBe('mock-format')
-  })
-
-  test('should update model', async () => {
-    await extension.pullModel('test-model', 'test-id', 'Test Model')
-    
-    const updatedModel = await extension.updateModel({
-      id: 'test-id',
-      name: 'Updated Model',
-      contextLength: 8192
-    })
-    
-    expect(updatedModel.name).toBe('Updated Model')
-    expect(updatedModel.contextLength).toBe(8192)
-    
-    // Verify changes persisted
-    const models = await extension.getModels()
-    expect(models[0].name).toBe('Updated Model')
-    expect(models[0].contextLength).toBe(8192)
-  })
-
-  test('should throw error when updating non-existent model', async () => {
-    await expect(extension.updateModel({
-      id: 'non-existent',
-      name: 'Updated Model'
-    })).rejects.toThrow('Model not found')
-  })
-
-  test('should throw error when updating model without ID', async () => {
-    await expect(extension.updateModel({
-      name: 'Updated Model'
-    })).rejects.toThrow('Model ID is required')
-  })
-
-  test('should delete model', async () => {
-    await extension.pullModel('test-model', 'test-id')
-    
-    // Verify model exists
-    let models = await extension.getModels()
-    expect(models).toHaveLength(1)
-    
-    // Delete the model
-    await extension.deleteModel('test-id')
-    
-    // Verify model was removed
-    models = await extension.getModels()
-    expect(models).toHaveLength(0)
-  })
-
-  test('should check if model is loaded', async () => {
-    await extension.pullModel('test-model', 'test-id')
-    
-    // Check if model is loaded
-    const isLoaded = await extension.isModelLoaded('test-id')
-    expect(isLoaded).toBe(true)
-    
-    // Check if non-existent model is loaded
-    const nonExistentLoaded = await extension.isModelLoaded('non-existent')
-    expect(nonExistentLoaded).toBe(false)
-  })
-
-  test('should add and get sources', async () => {
-    await extension.addSource('https://test-source.com')
-    
-    const sources = await extension.getSources()
-    expect(sources).toHaveLength(1)
-    expect(sources[0].url).toBe('https://test-source.com')
-  })
-
-  test('should delete source', async () => {
-    await extension.addSource('https://test-source.com')
-    
-    // Get the source ID
-    const sources = await extension.getSources()
-    const sourceId = sources[0].id
-    
-    // Delete the source
-    await extension.deleteSource(sourceId)
-    
-    // Verify source was removed
-    const updatedSources = await extension.getSources()
-    expect(updatedSources).toHaveLength(0)
-  })
-})
\ No newline at end of file
diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
deleted file mode 100644
index 238e5999f..000000000
--- a/core/src/browser/extensions/model.ts
+++ /dev/null
@@ -1,48 +0,0 @@
-import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { Model, ModelInterface, ModelSource, OptionType } from '../../types'
-
-/**
- * Model extension for managing models.
- */
-export abstract class ModelExtension
-  extends BaseExtension
-  implements ModelInterface
-{
-  /**
-   * Model extension type.
-   */
-  type(): ExtensionTypeEnum | undefined {
-    return ExtensionTypeEnum.Model
-  }
-
-  abstract configurePullOptions(configs: { [key: string]: any }): Promise<any>
-  abstract getModels(): Promise<Model[]>
-  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
-  abstract cancelModelPull(modelId: string): Promise<void>
-  abstract importModel(
-    model: string,
-    modePath: string,
-    name?: string,
-    optionType?: OptionType
-  ): Promise<void>
-  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
-  abstract deleteModel(model: string): Promise<void>
-  abstract isModelLoaded(model: string): Promise<boolean>
-  /**
-   * Get model sources
-   */
-  abstract getSources(): Promise<ModelSource[]>
-  /**
-   * Add a model source
-   */
-  abstract addSource(source: string): Promise<void>
-  /**
-   * Delete a model source
-   */
-  abstract deleteSource(source: string): Promise<void>
-
-  /**
-   * Fetch models hub
-   */
-  abstract fetchModelsHub(): Promise<void>
-}
diff --git a/extensions/engine-management-extension/engines.mjs b/extensions/engine-management-extension/engines.mjs
deleted file mode 100644
index eafe8a09c..000000000
--- a/extensions/engine-management-extension/engines.mjs
+++ /dev/null
@@ -1,47 +0,0 @@
-import anthropic from './resources/anthropic.json' with { type: 'json' }
-import cohere from './resources/cohere.json' with { type: 'json' }
-import openai from './resources/openai.json' with { type: 'json' }
-import openrouter from './resources/openrouter.json' with { type: 'json' }
-import groq from './resources/groq.json' with { type: 'json' }
-import martian from './resources/martian.json' with { type: 'json' }
-import mistral from './resources/mistral.json' with { type: 'json' }
-import nvidia from './resources/nvidia.json' with { type: 'json' }
-import deepseek from './resources/deepseek.json' with { type: 'json' }
-import googleGemini from './resources/google_gemini.json' with { type: 'json' }
-
-import anthropicModels from './models/anthropic.json' with { type: 'json' }
-import cohereModels from './models/cohere.json' with { type: 'json' }
-import openaiModels from './models/openai.json' with { type: 'json' }
-import openrouterModels from './models/openrouter.json' with { type: 'json' }
-import groqModels from './models/groq.json' with { type: 'json' }
-import martianModels from './models/martian.json' with { type: 'json' }
-import mistralModels from './models/mistral.json' with { type: 'json' }
-import nvidiaModels from './models/nvidia.json' with { type: 'json' }
-import deepseekModels from './models/deepseek.json' with { type: 'json' }
-import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
-
-const engines = [
-  anthropic,
-  openai,
-  cohere,
-  openrouter,
-  groq,
-  mistral,
-  martian,
-  nvidia,
-  deepseek,
-  googleGemini,
-]
-const models = [
-  ...anthropicModels,
-  ...openaiModels,
-  ...cohereModels,
-  ...openrouterModels,
-  ...groqModels,
-  ...mistralModels,
-  ...martianModels,
-  ...nvidiaModels,
-  ...deepseekModels,
-  ...googleGeminiModels,
-]
-export { engines, models }
diff --git a/extensions/engine-management-extension/jest.config.js b/extensions/engine-management-extension/jest.config.js
deleted file mode 100644
index 8bb37208d..000000000
--- a/extensions/engine-management-extension/jest.config.js
+++ /dev/null
@@ -1,5 +0,0 @@
-/** @type {import('ts-jest').JestConfigWithTsJest} */
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-}
diff --git a/extensions/engine-management-extension/models/anthropic.json b/extensions/engine-management-extension/models/anthropic.json
deleted file mode 100644
index 2b3d7d683..000000000
--- a/extensions/engine-management-extension/models/anthropic.json
+++ /dev/null
@@ -1,58 +0,0 @@
-[
-  {
-    "model": "claude-3-opus-latest",
-    "object": "model",
-    "name": "Claude 3 Opus Latest",
-    "version": "1.0",
-    "description": "Claude 3 Opus is a powerful model suitables for highly complex task.",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "anthropic"
-  },
-  {
-    "model": "claude-3-5-haiku-latest",
-    "object": "model",
-    "name": "Claude 3.5 Haiku Latest",
-    "version": "1.0",
-    "description": "Claude 3.5 Haiku is the fastest model provides near-instant responsiveness.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "anthropic"
-  },
-  {
-    "model": "claude-3-5-sonnet-latest",
-    "object": "model",
-    "name": "Claude 3.5 Sonnet Latest",
-    "version": "1.0",
-    "description": "Claude 3.5 Sonnet raises the industry bar for intelligence, outperforming competitor models and Claude 3 Opus on a wide range of evaluations, with the speed and cost of our mid-tier model, Claude 3 Sonnet.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "anthropic"
-  },
-  {
-    "model": "claude-3-7-sonnet-latest",
-    "object": "model",
-    "name": "Claude 3.7 Sonnet Latest",
-    "version": "1.0",
-    "description": "Claude 3.7 Sonnet is the first hybrid reasoning model on the market. It is the most intelligent model yet. It is faster, more cost effective, and more capable than any other model in its class.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "anthropic"
-  }
-]
diff --git a/extensions/engine-management-extension/models/cohere.json b/extensions/engine-management-extension/models/cohere.json
deleted file mode 100644
index 3c03be04e..000000000
--- a/extensions/engine-management-extension/models/cohere.json
+++ /dev/null
@@ -1,44 +0,0 @@
-[
-  {
-    "model": "command-r-plus",
-    "object": "model",
-    "name": "Command R+",
-    "version": "1.0",
-    "description": "Command R+ is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use.",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "cohere"
-  },
-  {
-    "model": "command-r",
-    "object": "model",
-    "name": "Command R",
-    "version": "1.0",
-    "description": "Command R is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It can be used for complex workflows like code generation, retrieval augmented generation (RAG), tool use, and agents.",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "cohere"
-  },
-  {
-    "model": "command-a-03-2025",
-    "object": "model",
-    "name": "Command A",
-    "version": "1.0",
-    "description": "Command A is an instruction-following conversational model that performs language tasks at a higher quality, more reliably, and with a longer context than previous models. It is best suited for complex RAG workflows and multi-step tool use.",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "stream": true
-    },
-    "engine": "cohere"
-  }
-]
diff --git a/extensions/engine-management-extension/models/deepseek.json b/extensions/engine-management-extension/models/deepseek.json
deleted file mode 100644
index 0e9930445..000000000
--- a/extensions/engine-management-extension/models/deepseek.json
+++ /dev/null
@@ -1,28 +0,0 @@
-[
-  {
-    "model": "deepseek-chat",
-    "object": "model",
-    "name": "DeepSeek V3",
-    "version": "1.0",
-    "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "deepseek"
-  },
-  {
-    "model": "deepseek-reasoner",
-    "object": "model",
-    "name": "DeepSeek R1",
-    "version": "1.0",
-    "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "deepseek"
-  }
-]
diff --git a/extensions/engine-management-extension/models/google_gemini.json b/extensions/engine-management-extension/models/google_gemini.json
deleted file mode 100644
index 2c21df5ee..000000000
--- a/extensions/engine-management-extension/models/google_gemini.json
+++ /dev/null
@@ -1,93 +0,0 @@
-[
-  {
-    "model": "gemini-1.5-flash",
-    "object": "model",
-    "name": "Gemini 1.5 Flash",
-    "version": "1.0",
-    "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  },
-  {
-    "model": "gemini-1.5-flash-8b",
-    "object": "model",
-    "name": "Gemini 1.5 Flash-8B",
-    "version": "1.0",
-    "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  },
-  {
-    "model": "gemini-1.5-pro",
-    "object": "model",
-    "name": "Gemini 1.5 Pro",
-    "version": "1.0",
-    "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  },
-  {
-    "model": "gemini-2.5-pro-preview-05-06",
-    "object": "model",
-    "name": "Gemini 2.5 Pro Preview",
-    "version": "1.0",
-    "description": "Gemini 2.5 Pro is our state-of-the-art thinking model, capable of reasoning over complex problems in code, math, and STEM, as well as analyzing large datasets, codebases, and documents using long context. Gemini 2.5 Pro rate limits are more restricted since it is an experimental / preview model.",
-    "inference_params": {
-      "max_tokens": 65536,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  },
-  {
-    "model": "gemini-2.5-flash-preview-04-17",
-    "object": "model",
-    "name": "Our best model in terms of price-performance, offering well-rounded capabilities. Gemini 2.5 Flash rate limits are more restricted since it is an experimental / preview model.",
-    "version": "1.0",
-    "description": "Gemini 2.5 Flash preview",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  },
-  {
-    "model": "gemini-2.0-flash",
-    "object": "model",
-    "name": "Gemini 2.0 Flash",
-    "version": "1.0",
-    "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  },
-  {
-    "model": "gemini-2.0-flash-lite",
-    "object": "model",
-    "name": "Gemini 2.0 Flash-Lite",
-    "version": "1.0",
-    "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.6,
-      "stream": true
-    },
-    "engine": "google_gemini"
-  }
-]
\ No newline at end of file
diff --git a/extensions/engine-management-extension/models/groq.json b/extensions/engine-management-extension/models/groq.json
deleted file mode 100644
index 981bd563b..000000000
--- a/extensions/engine-management-extension/models/groq.json
+++ /dev/null
@@ -1,87 +0,0 @@
-[
-  {
-    "model": "llama3-70b-8192",
-    "object": "model",
-    "name": "Groq Llama 3 70b",
-    "version": "1.1",
-    "description": "Groq Llama 3 70b with supercharged speed!",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "groq"
-  },
-  {
-    "model": "llama3-8b-8192",
-    "object": "model",
-    "name": "Groq Llama 3 8b",
-    "version": "1.1",
-    "description": "Groq Llama 3 8b with supercharged speed!",
-    "inference_params": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "groq"
-  },
-  {
-    "model": "llama-3.1-8b-instant",
-    "object": "model",
-    "name": "Groq Llama 3.1 8b Instant",
-    "version": "1.1",
-    "description": "Groq Llama 3.1 8b with supercharged speed!",
-    "inference_params": {
-      "max_tokens": 8000,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "groq"
-  },
-  {
-    "model": "gemma2-9b-it",
-    "object": "model",
-    "name": "Groq Gemma 9B Instruct",
-    "version": "1.2",
-    "description": "Groq Gemma 9b Instruct with supercharged speed!",
-    "parameters": {
-      "max_tokens": 8192,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "groq"
-  },
-  {
-    "model": "llama-3.3-70b-versatile",
-    "object": "model",
-    "name": "Groq Llama 3.3 70b Versatile",
-    "version": "3.3",
-    "description": "Groq Llama 3.3 70b Versatile with supercharged speed!",
-    "parameters": {
-      "max_tokens": 32768,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "groq"
-  }
-]
diff --git a/extensions/engine-management-extension/models/martian.json b/extensions/engine-management-extension/models/martian.json
deleted file mode 100644
index 9ce7b69ba..000000000
--- a/extensions/engine-management-extension/models/martian.json
+++ /dev/null
@@ -1,19 +0,0 @@
-[
-  {
-    "model": "router",
-    "object": "model",
-    "name": "Martian Model Router",
-    "version": "1.0",
-    "description": "Martian Model Router dynamically routes requests to the best LLM in real-time",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "martian"
-  }
-]
diff --git a/extensions/engine-management-extension/models/mistral.json b/extensions/engine-management-extension/models/mistral.json
deleted file mode 100644
index 47df5d506..000000000
--- a/extensions/engine-management-extension/models/mistral.json
+++ /dev/null
@@ -1,47 +0,0 @@
-[
-  {
-    "model": "mistral-small-latest",
-    "object": "model",
-    "name": "Mistral Small",
-    "version": "1.1",
-    "description": "Mistral Small is the ideal choice for simple tasks (Classification, Customer Support, or Text Generation) at an affordable price.",
-    "inference_params": {
-      "max_tokens": 32000,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "top_p": 0.95,
-      "stream": true
-    },
-    "engine": "mistral"
-  },
-  {
-    "model": "mistral-large-latest",
-    "object": "model",
-    "name": "Mistral Large",
-    "version": "1.1",
-    "description": "Mistral Large is ideal for complex tasks (Synthetic Text Generation, Code Generation, RAG, or Agents).",
-    "inference_params": {
-      "max_tokens": 32000,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "top_p": 0.95,
-      "stream": true
-    },
-    "engine": "mistral"
-  },
-  {
-    "model": "open-mixtral-8x22b",
-    "object": "model",
-    "name": "Mixtral 8x22B",
-    "version": "1.1",
-    "description": "Mixtral 8x22B is a high-performance, cost-effective model designed for complex tasks.",
-    "inference_params": {
-      "max_tokens": 32000,
-      "temperature": 0.7,
-      "max_temperature": 1.0,
-      "top_p": 0.95,
-      "stream": true
-    },
-    "engine": "mistral"
-  }
-]
diff --git a/extensions/engine-management-extension/models/nvidia.json b/extensions/engine-management-extension/models/nvidia.json
deleted file mode 100644
index cb6f9dec1..000000000
--- a/extensions/engine-management-extension/models/nvidia.json
+++ /dev/null
@@ -1,21 +0,0 @@
-[
-  {
-    "model": "mistralai/mistral-7b-instruct-v0.2",
-    "object": "model",
-    "name": "Mistral 7B",
-    "version": "1.1",
-    "description": "Mistral 7B with NVIDIA",
-    "inference_params": {
-      "max_tokens": 1024,
-      "temperature": 0.3,
-      "max_temperature": 1.0,
-      "top_p": 1,
-      "stream": false,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stop": null,
-      "seed": null
-    },
-    "engine": "nvidia"
-  }
-]
diff --git a/extensions/engine-management-extension/models/openai.json b/extensions/engine-management-extension/models/openai.json
deleted file mode 100644
index b2314ec0b..000000000
--- a/extensions/engine-management-extension/models/openai.json
+++ /dev/null
@@ -1,143 +0,0 @@
-[
-  {
-    "model": "gpt-4.5-preview",
-    "object": "model",
-    "name": "OpenAI GPT 4.5 Preview",
-    "version": "1.2",
-    "description": "OpenAI GPT 4.5 Preview is a research preview of GPT-4.5, our largest and most capable GPT model yet",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 16384,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "gpt-4-turbo",
-    "object": "model",
-    "name": "OpenAI GPT 4 Turbo",
-    "version": "1.2",
-    "description": "OpenAI GPT 4 Turbo model is extremely good",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "gpt-3.5-turbo",
-    "object": "model",
-    "name": "OpenAI GPT 3.5 Turbo",
-    "version": "1.1",
-    "description": "OpenAI GPT 3.5 Turbo model is extremely fast",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "gpt-4o",
-    "object": "model",
-    "name": "OpenAI GPT 4o",
-    "version": "1.1",
-    "description": "OpenAI GPT 4o is a new flagship model with fast speed and high quality",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "gpt-4o-mini",
-    "object": "model",
-    "name": "OpenAI GPT 4o-mini",
-    "version": "1.1",
-    "description": "GPT-4o mini (“o” for “omni”) is a fast, affordable small model for focused tasks.",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 16384,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
-      "frequency_penalty": 0,
-      "presence_penalty": 0
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "o1",
-    "object": "model",
-    "name": "OpenAI o1",
-    "version": "1.0",
-    "description": "OpenAI o1 is a new model with complex reasoning",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 100000
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "o1-preview",
-    "object": "model",
-    "name": "OpenAI o1-preview",
-    "version": "1.0",
-    "description": "OpenAI o1-preview is a new model with complex reasoning",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 32768,
-      "stream": true
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "o1-mini",
-    "object": "model",
-    "name": "OpenAI o1-mini",
-    "version": "1.0",
-    "description": "OpenAI o1-mini is a lightweight reasoning model",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 65536,
-      "stream": true
-    },
-    "engine": "openai"
-  },
-  {
-    "model": "o3-mini",
-    "object": "model",
-    "name": "OpenAI o3-mini",
-    "version": "1.0",
-    "description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
-    "format": "api",
-    "inference_params": {
-      "max_tokens": 100000,
-      "stream": true
-    },
-    "engine": "openai"
-  }
-]
diff --git a/extensions/engine-management-extension/models/openrouter.json b/extensions/engine-management-extension/models/openrouter.json
deleted file mode 100644
index bf132533c..000000000
--- a/extensions/engine-management-extension/models/openrouter.json
+++ /dev/null
@@ -1,92 +0,0 @@
-[
-  {
-    "model": "deepseek/deepseek-r1:free",
-    "object": "model",
-    "name": "DeepSeek: R1",
-    "version": "1.0",
-    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "inference_params": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stream": true
-    },
-    "engine": "openrouter"
-  },
-  {
-    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
-    "object": "model",
-    "name": "DeepSeek: R1 Distill Llama 70B",
-    "version": "1.0",
-    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "inference_params": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stream": true
-    },
-    "engine": "openrouter"
-  },
-  {
-    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
-    "object": "model",
-    "name": "DeepSeek: R1 Distill Llama 70B",
-    "version": "1.0",
-    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "inference_params": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stream": true
-    },
-    "engine": "openrouter"
-  },
-  {
-    "model": "meta-llama/llama-3.1-405b-instruct:free",
-    "object": "model",
-    "name": "Meta: Llama 3.1 405B Instruct",
-    "version": "1.0",
-    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "inference_params": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stream": true
-    },
-    "engine": "openrouter"
-  },
-  {
-    "model": "qwen/qwen-vl-plus:free",
-    "object": "model",
-    "name": "Qwen: Qwen VL Plus",
-    "version": "1.0",
-    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "inference_params": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stream": true
-    },
-    "engine": "openrouter"
-  },
-  {
-    "model": "qwen/qwen2.5-vl-72b-instruct:free",
-    "object": "model",
-    "name": "Qwen: Qwen2.5 VL 72B Instruct",
-    "version": "1.0",
-    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
-    "inference_params": {
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "frequency_penalty": 0,
-      "presence_penalty": 0,
-      "stream": true
-    },
-    "engine": "openrouter"
-  }
-]
diff --git a/extensions/engine-management-extension/package.json b/extensions/engine-management-extension/package.json
deleted file mode 100644
index d08998ba8..000000000
--- a/extensions/engine-management-extension/package.json
+++ /dev/null
@@ -1,47 +0,0 @@
-{
-  "name": "@janhq/engine-management-extension",
-  "productName": "Engine Management",
-  "version": "1.0.3",
-  "description": "Manages AI engines and their configurations.",
-  "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
-  "author": "Jan <service@jan.ai>",
-  "license": "MIT",
-  "scripts": {
-    "test": "vitest run",
-    "build": "rolldown -c rolldown.config.mjs",
-    "codesign:darwin": "../../.github/scripts/auto-sign.sh",
-    "codesign:win32:linux": "echo 'No codesigning required'",
-    "codesign": "run-script-os",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "exports": {
-    ".": "./dist/index.js",
-    "./main": "./dist/module.js"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "^1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.3.3",
-    "vitest": "^3.0.6"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "ky": "^1.7.2",
-    "p-queue": "^8.0.1"
-  },
-  "bundledDependencies": [
-    "@janhq/core"
-  ],
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ]
-}
diff --git a/extensions/engine-management-extension/resources/anthropic.json b/extensions/engine-management-extension/resources/anthropic.json
deleted file mode 100644
index f8ba74e2b..000000000
--- a/extensions/engine-management-extension/resources/anthropic.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "anthropic",
-  "type": "remote",
-  "engine": "anthropic",
-  "url": "https://console.anthropic.com/settings/keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://api.anthropic.com/v1/models",
-    "header_template": "x-api-key: {{api_key}} anthropic-version: 2023-06-01",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://api.anthropic.com/v1/messages",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": {{ tojson(input_request.delta.text) }} {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
-      }
-    },
-    "explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/cohere.json b/extensions/engine-management-extension/resources/cohere.json
deleted file mode 100644
index 02f1cc625..000000000
--- a/extensions/engine-management-extension/resources/cohere.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "cohere",
-  "type": "remote",
-  "engine": "cohere",
-  "url": "https://dashboard.cohere.com/api-keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://api.cohere.ai/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://api.cohere.ai/v1/chat",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": {{ tojson(message.content) }} } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": {{ tojson(last(input_request.messages).content) }} {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": {{ tojson(message.content) }} } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": {{ tojson(last(input_request.messages).content) }} {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": {{ tojson(input_request.text) }} {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %}  {{ tojson(input_request.text) }} {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
-      }
-    },
-    "explore_models_url": "https://docs.cohere.com/v2/docs/models"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/deepseek.json b/extensions/engine-management-extension/resources/deepseek.json
deleted file mode 100644
index 214ec3b23..000000000
--- a/extensions/engine-management-extension/resources/deepseek.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "deepseek",
-  "type": "remote",
-  "engine": "deepseek",
-  "url": "https://platform.deepseek.com/api_keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://api.deepseek.com/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://api.deepseek.com/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/google_gemini.json b/extensions/engine-management-extension/resources/google_gemini.json
deleted file mode 100644
index f860a1990..000000000
--- a/extensions/engine-management-extension/resources/google_gemini.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "google_gemini",
-  "type": "remote",
-  "engine": "google_gemini",
-  "url": "https://aistudio.google.com/apikey",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://generativelanguage.googleapis.com/openai/v1beta/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/groq.json b/extensions/engine-management-extension/resources/groq.json
deleted file mode 100644
index 87d215ab2..000000000
--- a/extensions/engine-management-extension/resources/groq.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "groq",
-  "type": "remote",
-  "engine": "groq",
-  "url": "https://console.groq.com/keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://api.groq.com/openai/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://api.groq.com/openai/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://console.groq.com/docs/models"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/martian.json b/extensions/engine-management-extension/resources/martian.json
deleted file mode 100644
index 3fd458660..000000000
--- a/extensions/engine-management-extension/resources/martian.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "martian",
-  "type": "remote",
-  "engine": "martian",
-  "url": "https://withmartian.com/dashboard",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://withmartian.com/api/openai/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://withmartian.com/api/openai/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://withmartian.github.io/llm-adapters/"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/mistral.json b/extensions/engine-management-extension/resources/mistral.json
deleted file mode 100644
index 4a24471a2..000000000
--- a/extensions/engine-management-extension/resources/mistral.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "mistral",
-  "type": "remote",
-  "engine": "mistral",
-  "url": "https://console.mistral.ai/api-keys/",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://api.mistral.ai/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://api.mistral.ai/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/nvidia.json b/extensions/engine-management-extension/resources/nvidia.json
deleted file mode 100644
index 573bad4f6..000000000
--- a/extensions/engine-management-extension/resources/nvidia.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "nvidia",
-  "type": "remote",
-  "engine": "nvidia",
-  "url": "https://org.ngc.nvidia.com/setup/personal-keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://integrate.api.nvidia.com/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://integrate.api.nvidia.com/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://build.nvidia.com/models"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/openai.json b/extensions/engine-management-extension/resources/openai.json
deleted file mode 100644
index f178a1a6f..000000000
--- a/extensions/engine-management-extension/resources/openai.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "openai",
-  "type": "remote",
-  "engine": "openai",
-  "url": "https://platform.openai.com/account/api-keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://api.openai.com/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://api.openai.com/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or key == \"stop\" %} {% if not first %}, {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [ {% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %}, {% endif %} {% endif %} {% endfor %} ] {% else if key == \"stop\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} {% set first = false %} {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://platform.openai.com/docs/models"
-  }
-}
diff --git a/extensions/engine-management-extension/resources/openrouter.json b/extensions/engine-management-extension/resources/openrouter.json
deleted file mode 100644
index 798199708..000000000
--- a/extensions/engine-management-extension/resources/openrouter.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "id": "openrouter",
-  "type": "remote",
-  "engine": "openrouter",
-  "url": "https://openrouter.ai/keys",
-  "api_key": "",
-  "metadata": {
-    "get_models_url": "https://openrouter.ai/api/v1/models",
-    "header_template": "Authorization: Bearer {{api_key}}",
-    "transform_req": {
-      "chat_completions": {
-        "url": "https://openrouter.ai/api/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    },
-    "transform_resp": {
-      "chat_completions": {
-        "template": "{{tojson(input_request)}}"
-      }
-    },
-    "explore_models_url": "https://openrouter.ai/models"
-  }
-}
diff --git a/extensions/engine-management-extension/rolldown.config.mjs b/extensions/engine-management-extension/rolldown.config.mjs
deleted file mode 100644
index 98a5445cf..000000000
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,44 +0,0 @@
-import { defineConfig } from 'rolldown'
-import { engines, models } from './engines.mjs'
-import pkgJson from './package.json' with { type: 'json' }
-
-export default defineConfig([
-  {
-    input: 'src/index.ts',
-    output: {
-      format: 'esm',
-      file: 'dist/index.js',
-    },
-    define: {
-      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
-      API_URL: JSON.stringify(
-        `http://127.0.0.1:${process.env.CORTEX_API_PORT ?? '39291'}`
-      ),
-      PLATFORM: JSON.stringify(process.platform),
-      CORTEX_ENGINE_VERSION: JSON.stringify('b5509'),
-      DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
-      DEFAULT_REMOTE_MODELS: JSON.stringify(models),
-      DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
-        `{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
-      ),
-      DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
-        '{{tojson(input_request)}}'
-      ),
-      DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
-        'Authorization: Bearer {{api_key}}'
-      ),
-      VERSION: JSON.stringify(pkgJson.version ?? '0.0.0'),
-    },
-  },
-  {
-    input: 'src/node/index.ts',
-    external: ['@janhq/core/node'],
-    output: {
-      format: 'cjs',
-      file: 'dist/node/index.cjs.js',
-    },
-    define: {
-      CORTEX_ENGINE_VERSION: JSON.stringify('b5509'),
-    },
-  },
-])
diff --git a/extensions/engine-management-extension/src/@types/global.d.ts b/extensions/engine-management-extension/src/@types/global.d.ts
deleted file mode 100644
index 0dbed3806..000000000
--- a/extensions/engine-management-extension/src/@types/global.d.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-declare const API_URL: string
-declare const CORTEX_ENGINE_VERSION: string
-declare const PLATFORM: string
-declare const NODE: string
-declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
-declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
-declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
-declare const VERSION: string
-
-declare const DEFAULT_REMOTE_ENGINES: ({
-  id: string
-  engine: string
-} & EngineConfig)[]
-declare const DEFAULT_REMOTE_MODELS: Model[]
-
-interface Core {
-  api: APIFunctions
-  events: EventEmitter
-}
-interface Window {
-  core?: Core | undefined
-  electronAPI?: any | undefined
-}
diff --git a/extensions/engine-management-extension/src/api.test.ts b/extensions/engine-management-extension/src/api.test.ts
deleted file mode 100644
index ab72f8127..000000000
--- a/extensions/engine-management-extension/src/api.test.ts
+++ /dev/null
@@ -1,199 +0,0 @@
-import { describe, beforeEach, it, expect, vi } from 'vitest'
-import JanEngineManagementExtension from './index'
-import { InferenceEngine } from '@janhq/core'
-
-describe('API methods', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  describe('getReleasedEnginesByVersion', () => {
-    it('should return engines filtered by platform if provided', async () => {
-      const mockEngines = [
-        {
-          name: 'windows-amd64-avx2',
-          version: '1.0.0',
-        },
-        {
-          name: 'linux-amd64-avx2',
-          version: '1.0.0',
-        },
-      ]
-
-      vi.mock('ky', () => ({
-        default: {
-          get: () => ({
-            json: () => Promise.resolve(mockEngines),
-          }),
-        },
-      }))
-
-      const mock = vi.spyOn(extension, 'getReleasedEnginesByVersion')
-      mock.mockImplementation(async (name, version, platform) => {
-        const result = await Promise.resolve(mockEngines)
-        return platform ? result.filter(r => r.name.includes(platform)) : result
-      })
-
-      const result = await extension.getReleasedEnginesByVersion(
-        InferenceEngine.cortex_llamacpp,
-        '1.0.0',
-        'windows'
-      )
-
-      expect(result).toHaveLength(1)
-      expect(result[0].name).toBe('windows-amd64-avx2')
-    })
-
-    it('should return all engines if platform is not provided', async () => {
-      const mockEngines = [
-        {
-          name: 'windows-amd64-avx2',
-          version: '1.0.0',
-        },
-        {
-          name: 'linux-amd64-avx2',
-          version: '1.0.0',
-        },
-      ]
-
-      vi.mock('ky', () => ({
-        default: {
-          get: () => ({
-            json: () => Promise.resolve(mockEngines),
-          }),
-        },
-      }))
-
-      const mock = vi.spyOn(extension, 'getReleasedEnginesByVersion')
-      mock.mockImplementation(async (name, version, platform) => {
-        const result = await Promise.resolve(mockEngines)
-        return platform ? result.filter(r => r.name.includes(platform)) : result
-      })
-
-      const result = await extension.getReleasedEnginesByVersion(
-        InferenceEngine.cortex_llamacpp,
-        '1.0.0'
-      )
-
-      expect(result).toHaveLength(2)
-    })
-  })
-
-  describe('getLatestReleasedEngine', () => {
-    it('should return engines filtered by platform if provided', async () => {
-      const mockEngines = [
-        {
-          name: 'windows-amd64-avx2',
-          version: '1.0.0',
-        },
-        {
-          name: 'linux-amd64-avx2',
-          version: '1.0.0',
-        },
-      ]
-
-      vi.mock('ky', () => ({
-        default: {
-          get: () => ({
-            json: () => Promise.resolve(mockEngines),
-          }),
-        },
-      }))
-
-      const mock = vi.spyOn(extension, 'getLatestReleasedEngine')
-      mock.mockImplementation(async (name, platform) => {
-        const result = await Promise.resolve(mockEngines)
-        return platform ? result.filter(r => r.name.includes(platform)) : result
-      })
-
-      const result = await extension.getLatestReleasedEngine(
-        InferenceEngine.cortex_llamacpp,
-        'linux'
-      )
-
-      expect(result).toHaveLength(1)
-      expect(result[0].name).toBe('linux-amd64-avx2')
-    })
-  })
-
-  describe('installEngine', () => {
-    it('should send install request with correct parameters', async () => {
-      const mockEngineConfig = {
-        variant: 'windows-amd64-avx2',
-        version: '1.0.0',
-      }
-
-      vi.mock('ky', () => ({
-        default: {
-          post: (url, options) => {
-            expect(url).toBe(`${API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/install`)
-            expect(options.json).toEqual(mockEngineConfig)
-            return Promise.resolve({ messages: 'OK' })
-          },
-        },
-      }))
-
-      const result = await extension.installEngine(
-        InferenceEngine.cortex_llamacpp,
-        mockEngineConfig
-      )
-
-      expect(result).toEqual({ messages: 'OK' })
-    })
-  })
-
-  describe('uninstallEngine', () => {
-    it('should send uninstall request with correct parameters', async () => {
-      const mockEngineConfig = {
-        variant: 'windows-amd64-avx2',
-        version: '1.0.0',
-      }
-
-      vi.mock('ky', () => ({
-        default: {
-          delete: (url, options) => {
-            expect(url).toBe(`${API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/install`)
-            expect(options.json).toEqual(mockEngineConfig)
-            return Promise.resolve({ messages: 'OK' })
-          },
-        },
-      }))
-
-      const result = await extension.uninstallEngine(
-        InferenceEngine.cortex_llamacpp,
-        mockEngineConfig
-      )
-
-      expect(result).toEqual({ messages: 'OK' })
-    })
-  })
-
-  describe('addRemoteModel', () => {
-    it('should send add model request with correct parameters', async () => {
-      const mockModel = {
-        id: 'gpt-4',
-        name: 'GPT-4',
-        engine: InferenceEngine.openai,
-      }
-
-      vi.mock('ky', () => ({
-        default: {
-          post: (url, options) => {
-            expect(url).toBe(`${API_URL}/v1/models/add`)
-            expect(options.json).toHaveProperty('id', 'gpt-4')
-            expect(options.json).toHaveProperty('engine', InferenceEngine.openai)
-            expect(options.json).toHaveProperty('inference_params')
-            return Promise.resolve()
-          },
-        },
-      }))
-
-      await extension.addRemoteModel(mockModel)
-      // Success is implied by no thrown exceptions
-    })
-  })
-})
\ No newline at end of file
diff --git a/extensions/engine-management-extension/src/error.test.ts b/extensions/engine-management-extension/src/error.test.ts
deleted file mode 100644
index 87389c50c..000000000
--- a/extensions/engine-management-extension/src/error.test.ts
+++ /dev/null
@@ -1,19 +0,0 @@
-import { describe, it, expect } from 'vitest'
-import { EngineError } from './error'
-
-describe('EngineError', () => {
-  it('should create an error with the correct message', () => {
-    const errorMessage = 'Test error message'
-    const error = new EngineError(errorMessage)
-    
-    expect(error).toBeInstanceOf(Error)
-    expect(error.message).toBe(errorMessage)
-    expect(error.name).toBe('EngineError')
-  })
-
-  it('should create an error with default message if none provided', () => {
-    const error = new EngineError()
-    
-    expect(error.message).toBe('Engine error occurred')
-  })
-})
\ No newline at end of file
diff --git a/extensions/engine-management-extension/src/error.ts b/extensions/engine-management-extension/src/error.ts
deleted file mode 100644
index 50c75f22f..000000000
--- a/extensions/engine-management-extension/src/error.ts
+++ /dev/null
@@ -1,10 +0,0 @@
-/**
- * Custom Engine Error
- */
-export class EngineError extends Error {
-  message: string
-  constructor(message: string) {
-    super()
-    this.message = message
-  }
-}
diff --git a/extensions/engine-management-extension/src/index.test.ts b/extensions/engine-management-extension/src/index.test.ts
deleted file mode 100644
index 174992f3b..000000000
--- a/extensions/engine-management-extension/src/index.test.ts
+++ /dev/null
@@ -1,449 +0,0 @@
-import { describe, beforeEach, it, expect, vi } from 'vitest'
-import JanEngineManagementExtension from './index'
-import { Engines, InferenceEngine } from '@janhq/core'
-import { EngineError } from './error'
-import { HTTPError } from 'ky'
-
-vi.stubGlobal('API_URL', 'http://localhost:3000')
-
-const mockEngines: Engines = [
-  {
-    name: 'variant1',
-    version: '1.0.0',
-    type: 'local',
-    engine: InferenceEngine.cortex_llamacpp,
-  },
-]
-
-const mockRemoteEngines: Engines = [
-  {
-    name: 'openai',
-    version: '1.0.0',
-    type: 'remote',
-    engine: InferenceEngine.openai,
-  },
-]
-
-const mockRemoteModels = {
-  data: [
-    {
-      id: 'gpt-4',
-      name: 'GPT-4',
-      engine: InferenceEngine.openai,
-    },
-  ],
-}
-
-vi.stubGlobal('DEFAULT_REMOTE_ENGINES', mockEngines)
-vi.stubGlobal('DEFAULT_REMOTE_MODELS', mockRemoteModels.data)
-
-describe('migrate engine settings', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('engines should be migrated', async () => {
-    vi.stubGlobal('VERSION', '2.0.0')
-
-    vi.spyOn(extension, 'getEngines').mockResolvedValue([])
-    const mockUpdateEngines = vi
-      .spyOn(extension, 'updateEngine')
-      .mockReturnThis()
-
-    mockUpdateEngines.mockResolvedValue({
-      messages: 'OK',
-    })
-
-    await extension.migrate()
-
-    // Assert that the returned value is equal to the mockEngines object
-    expect(mockUpdateEngines).toBeCalled()
-  })
-
-  it('should not migrate when extension version is not updated', async () => {
-    vi.stubGlobal('VERSION', '0.0.0')
-    vi.spyOn(extension, 'getEngines').mockResolvedValue([])
-    const mockUpdateEngines = vi
-      .spyOn(extension, 'updateEngine')
-      .mockReturnThis()
-
-    mockUpdateEngines.mockResolvedValue({
-      messages: 'OK',
-    })
-
-    await extension.migrate()
-
-    // Assert that the returned value is equal to the mockEngines object
-    expect(mockUpdateEngines).not.toBeCalled()
-  })
-})
-
-describe('getEngines', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should return a list of engines', async () => {
-    const mockKyGet = vi.spyOn(extension, 'getEngines')
-    mockKyGet.mockResolvedValue(mockEngines)
-
-    const engines = await extension.getEngines()
-
-    expect(engines).toEqual(mockEngines)
-  })
-})
-
-describe('getRemoteModels', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should return a list of remote models', async () => {
-    vi.mock('ky', () => ({
-      default: {
-        get: () => ({
-          json: () => Promise.resolve(mockRemoteModels),
-        }),
-      },
-    }))
-
-    const models = await extension.getRemoteModels('openai')
-    expect(models).toEqual(mockRemoteModels)
-  })
-
-  it('should return empty data array when request fails', async () => {
-    vi.mock('ky', () => ({
-      default: {
-        get: () => ({
-          json: () => Promise.reject(new Error('Failed to fetch')),
-        }),
-      },
-    }))
-
-    const models = await extension.getRemoteModels('openai')
-    expect(models).toEqual({ data: [] })
-  })
-})
-
-describe('getInstalledEngines', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should return a list of installed engines', async () => {
-    const mockEngineVariants = [
-      {
-        name: 'windows-amd64-noavx',
-        version: '1.0.0',
-      },
-    ]
-
-    vi.mock('ky', () => ({
-      default: {
-        get: () => ({
-          json: () => Promise.resolve(mockEngineVariants),
-        }),
-      },
-    }))
-
-    const mock = vi.spyOn(extension, 'getInstalledEngines')
-    mock.mockResolvedValue(mockEngineVariants)
-
-    const engines = await extension.getInstalledEngines(InferenceEngine.cortex_llamacpp)
-    expect(engines).toEqual(mockEngineVariants)
-  })
-})
-
-describe('healthz', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should perform health check successfully', async () => {
-    vi.mock('ky', () => ({
-      default: {
-        get: () => Promise.resolve(),
-      },
-    }))
-
-    await extension.healthz()
-    expect(extension.queue.concurrency).toBe(Infinity)
-  })
-})
-
-describe('updateDefaultEngine', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should set default engine variant if not installed', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    vi.stubGlobal('CORTEX_ENGINE_VERSION', '1.0.0')
-
-    const mockGetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'getDefaultEngineVariant'
-    )
-    mockGetDefaultEngineVariant.mockResolvedValue({
-      variant: 'variant1',
-      version: '1.0.0',
-    })
-
-    const mockGetInstalledEngines = vi.spyOn(extension, 'getInstalledEngines')
-    mockGetInstalledEngines.mockResolvedValue([])
-
-    const mockSetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'setDefaultEngineVariant'
-    )
-    mockSetDefaultEngineVariant.mockResolvedValue({ messages: 'OK' })
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        systemInformation: vi.fn().mockResolvedValue({ gpuSetting: 'high' }),
-      }
-    })
-
-    vi.mock('./utils', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        engineVariant: vi.fn().mockResolvedValue('windows-amd64-noavx'),
-      }
-    })
-
-    await extension.updateDefaultEngine()
-
-    expect(mockSetDefaultEngineVariant).toHaveBeenCalledWith('llama-cpp', {
-      variant: 'windows-amd64-noavx',
-      version: '1.0.0',
-    })
-  })
-
-  it('should not reset default engine variant if installed', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    vi.stubGlobal('CORTEX_ENGINE_VERSION', '1.0.0')
-
-    const mockGetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'getDefaultEngineVariant'
-    )
-    mockGetDefaultEngineVariant.mockResolvedValue({
-      variant: 'windows-amd64-noavx',
-      version: '1.0.0',
-    })
-
-    const mockGetInstalledEngines = vi.spyOn(extension, 'getInstalledEngines')
-    mockGetInstalledEngines.mockResolvedValue([
-      {
-        name: 'windows-amd64-noavx',
-        version: '1.0.0',
-        type: 'local',
-        engine: InferenceEngine.cortex_llamacpp,
-      },
-    ])
-
-    const mockSetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'setDefaultEngineVariant'
-    )
-    mockSetDefaultEngineVariant.mockResolvedValue({ messages: 'OK' })
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        systemInformation: vi.fn().mockResolvedValue({ gpuSetting: 'high' }),
-      }
-    })
-
-    vi.mock('./utils', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        engineVariant: vi.fn().mockResolvedValue('windows-amd64-noavx'),
-      }
-    })
-
-    await extension.updateDefaultEngine()
-
-    expect(mockSetDefaultEngineVariant).not.toBeCalled()
-  })
-
-  it('should handle HTTPError when getting default engine variant', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    vi.stubGlobal('CORTEX_ENGINE_VERSION', '1.0.0')
-
-    const httpError = new Error('HTTP Error') as HTTPError
-    httpError.response = { status: 400 } as Response
-
-    const mockGetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'getDefaultEngineVariant'
-    )
-    mockGetDefaultEngineVariant.mockRejectedValue(httpError)
-
-    const mockSetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'setDefaultEngineVariant'
-    )
-    mockSetDefaultEngineVariant.mockResolvedValue({ messages: 'OK' })
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        systemInformation: vi.fn().mockResolvedValue({ gpuSetting: 'high' }),
-      }
-    })
-
-    vi.mock('./utils', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        engineVariant: vi.fn().mockResolvedValue('windows-amd64-noavx'),
-      }
-    })
-
-    await extension.updateDefaultEngine()
-
-    expect(mockSetDefaultEngineVariant).toHaveBeenCalledWith('llama-cpp', {
-      variant: 'windows-amd64-noavx',
-      version: '1.0.0',
-    })
-  })
-
-  it('should handle EngineError when getting default engine variant', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    vi.stubGlobal('CORTEX_ENGINE_VERSION', '1.0.0')
-
-    const mockGetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'getDefaultEngineVariant'
-    )
-    mockGetDefaultEngineVariant.mockRejectedValue(new EngineError('Test error'))
-
-    const mockSetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'setDefaultEngineVariant'
-    )
-    mockSetDefaultEngineVariant.mockResolvedValue({ messages: 'OK' })
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        systemInformation: vi.fn().mockResolvedValue({ gpuSetting: 'high' }),
-      }
-    })
-
-    vi.mock('./utils', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        engineVariant: vi.fn().mockResolvedValue('windows-amd64-noavx'),
-      }
-    })
-
-    await extension.updateDefaultEngine()
-
-    expect(mockSetDefaultEngineVariant).toHaveBeenCalledWith('llama-cpp', {
-      variant: 'windows-amd64-noavx',
-      version: '1.0.0',
-    })
-  })
-
-  it('should handle unexpected errors gracefully', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    
-    const mockGetDefaultEngineVariant = vi.spyOn(
-      extension,
-      'getDefaultEngineVariant'
-    )
-    mockGetDefaultEngineVariant.mockRejectedValue(new Error('Unexpected error'))
-
-    const consoleSpy = vi.spyOn(console, 'error').mockImplementation(() => {})
-    
-    await extension.updateDefaultEngine()
-    
-    expect(consoleSpy).toHaveBeenCalled()
-  })
-})
-
-describe('populateDefaultRemoteEngines', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should not add default remote engines if remote engines already exist', async () => {
-    const mockGetEngines = vi.spyOn(extension, 'getEngines')
-    mockGetEngines.mockResolvedValue(mockRemoteEngines)
-
-    const mockAddRemoteEngine = vi.spyOn(extension, 'addRemoteEngine')
-    
-    await extension.populateDefaultRemoteEngines()
-    
-    expect(mockAddRemoteEngine).not.toBeCalled()
-  })
-
-  it('should add default remote engines if no remote engines exist', async () => {
-    const mockGetEngines = vi.spyOn(extension, 'getEngines')
-    mockGetEngines.mockResolvedValue([])
-
-    const mockAddRemoteEngine = vi.spyOn(extension, 'addRemoteEngine')
-    mockAddRemoteEngine.mockResolvedValue({ messages: 'OK' })
-
-    const mockAddRemoteModel = vi.spyOn(extension, 'addRemoteModel')
-    mockAddRemoteModel.mockResolvedValue(undefined)
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        events: {
-          emit: vi.fn(),
-        },
-        joinPath: vi.fn().mockResolvedValue('/path/to/settings.json'),
-        getJanDataFolderPath: vi.fn().mockResolvedValue('/path/to/data'),
-        fs: {
-          existsSync: vi.fn().mockResolvedValue(false),
-        },
-      }
-    })
-
-    await extension.populateDefaultRemoteEngines()
-    
-    expect(mockAddRemoteEngine).toHaveBeenCalled()
-    expect(mockAddRemoteModel).toHaveBeenCalled()
-  })
-})
diff --git a/extensions/engine-management-extension/src/index.ts b/extensions/engine-management-extension/src/index.ts
deleted file mode 100644
index 331bf6531..000000000
--- a/extensions/engine-management-extension/src/index.ts
+++ /dev/null
@@ -1,412 +0,0 @@
-import {
-  EngineManagementExtension,
-  DefaultEngineVariant,
-  Engines,
-  EngineConfig,
-  EngineVariant,
-  EngineReleased,
-  executeOnMain,
-  systemInformation,
-  Model,
-  fs,
-  joinPath,
-  events,
-  ModelEvent,
-  EngineEvent,
-} from '@janhq/core'
-import ky, { HTTPError, KyInstance } from 'ky'
-import { EngineError } from './error'
-import { getJanDataFolderPath } from '@janhq/core'
-import { engineVariant } from './utils'
-
-interface ModelList {
-  data: Model[]
-}
-/**
- * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
- * functionality for managing engines.
- */
-export default class JanEngineManagementExtension extends EngineManagementExtension {
-  api?: KyInstance
-  /**
-   * Get the API instance
-   * @returns
-   */
-  async apiInstance(): Promise<KyInstance> {
-    if (this.api) return this.api
-    const apiKey = await window.core?.api.appToken()
-    this.api = ky.extend({
-      prefixUrl: API_URL,
-      headers: apiKey
-        ? {
-          Authorization: `Bearer ${apiKey}`,
-        }
-        : {},
-      retry: 10,
-    })
-    return this.api
-  }
-  /**
-   * Called when the extension is loaded.
-   */
-  async onLoad() {
-    // Update default local engine
-    // this.updateDefaultEngine()
-
-    // Migrate
-    this.migrate()
-  }
-
-  /**
-   * Called when the extension is unloaded.
-   */
-  onUnload() { }
-
-  /**
-   * @returns A Promise that resolves to an object of list engines.
-   */
-  async getEngines(): Promise<Engines> {
-    return {}
-    return this.apiInstance().then((api) =>
-      api
-        .get('v1/engines')
-        .json<Engines>()
-        .then((e) => e)
-    ) as Promise<Engines>
-  }
-
-  /**
-   * @returns A Promise that resolves to an object of list engines.
-   */
-  async getRemoteModels(name: string): Promise<any> {
-    return this.apiInstance().then(
-      (api) =>
-        api
-          .get(`v1/models/remote/${name}`)
-          .json<ModelList>()
-          .catch(() => ({
-            data: [],
-          })) as Promise<ModelList>
-    )
-  }
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to an array of installed engine.
-   */
-  async getInstalledEngines(name: string): Promise<EngineVariant[]> {
-    return []
-    return this.apiInstance().then((api) =>
-      api
-        .get(`v1/engines/${name}`)
-        .json<EngineVariant[]>()
-        .then((e) => e)
-    ) as Promise<EngineVariant[]>
-  }
-
-  /**
-   * @param name - Inference engine name.
-   * @param version - Version of the engine.
-   * @param platform - Optional to sort by operating system. macOS, linux, windows.
-   * @returns A Promise that resolves to an array of latest released engine by version.
-   */
-  async getReleasedEnginesByVersion(
-    name: string,
-    version: string,
-    platform?: string
-  ) {
-    return this.apiInstance().then((api) =>
-      api
-        .get(`v1/engines/${name}/releases/${version}`)
-        .json<EngineReleased[]>()
-        .then((e) =>
-          platform ? e.filter((r) => r.name.includes(platform)) : e
-        )
-    ) as Promise<EngineReleased[]>
-  }
-
-  /**
-   * @param name - Inference engine name.
-   * @param platform - Optional to sort by operating system. macOS, linux, windows.
-   * @returns A Promise that resolves to an array of latest released engine by version.
-   */
-  async getLatestReleasedEngine(name: string, platform?: string) {
-    return this.apiInstance().then((api) =>
-      api
-        .get(`v1/engines/${name}/releases/latest`)
-        .json<EngineReleased[]>()
-        .then((e) =>
-          platform ? e.filter((r) => r.name.includes(platform)) : e
-        )
-    ) as Promise<EngineReleased[]>
-  }
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to intall of engine.
-   */
-  async installEngine(name: string, engineConfig: EngineConfig) {
-    return this.apiInstance().then((api) =>
-      api
-        .post(`v1/engines/${name}/install`, { json: engineConfig })
-        .then((e) => e)
-    ) as Promise<{ messages: string }>
-  }
-
-  /**
-   * Add a new remote engine
-   * @returns A Promise that resolves to intall of engine.
-   */
-  async addRemoteEngine(
-    engineConfig: EngineConfig,
-    persistModels: boolean = true
-  ) {
-    // Populate default settings
-    if (
-      engineConfig.metadata?.transform_req?.chat_completions &&
-      !engineConfig.metadata.transform_req.chat_completions.template
-    )
-      engineConfig.metadata.transform_req.chat_completions.template =
-        DEFAULT_REQUEST_PAYLOAD_TRANSFORM
-
-    if (
-      engineConfig.metadata?.transform_resp?.chat_completions &&
-      !engineConfig.metadata.transform_resp.chat_completions?.template
-    )
-      engineConfig.metadata.transform_resp.chat_completions.template =
-        DEFAULT_RESPONSE_BODY_TRANSFORM
-
-    if (engineConfig.metadata && !engineConfig.metadata?.header_template)
-      engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
-
-    return this.apiInstance().then((api) =>
-      api.post('v1/engines', { json: engineConfig }).then((e) => {
-        if (persistModels && engineConfig.metadata?.get_models_url) {
-          // Pull /models from remote models endpoint
-          return this.populateRemoteModels(engineConfig)
-            .then(() => e)
-            .catch(() => e)
-        }
-        return e
-      })
-    ) as Promise<{ messages: string }>
-  }
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to unintall of engine.
-   */
-  async uninstallEngine(name: string, engineConfig: EngineConfig) {
-    return this.apiInstance().then((api) =>
-      api
-        .delete(`v1/engines/${name}/install`, { json: engineConfig })
-        .then((e) => e)
-    ) as Promise<{ messages: string }>
-  }
-
-  /**
-   * Add a new remote model
-   * @param model - Remote model object.
-   */
-  async addRemoteModel(model: Model) {
-    return this.apiInstance().then((api) =>
-      api
-        .post('v1/models/add', {
-          json: {
-            inference_params: {
-              max_tokens: 4096,
-              temperature: 0.7,
-              top_p: 0.95,
-              stream: true,
-              frequency_penalty: 0,
-              presence_penalty: 0,
-            },
-            ...model,
-          },
-        })
-        .then((e) => e)
-        .then(() => { })
-    )
-  }
-
-  /**
-   * @param name - Inference engine name.
-   * @returns A Promise that resolves to an object of default engine.
-   */
-  async getDefaultEngineVariant(name: string) {
-    return this.apiInstance().then((api) =>
-      api
-        .get(`v1/engines/${name}/default`)
-        .json<{ messages: string }>()
-        .then((e) => e)
-    ) as Promise<DefaultEngineVariant>
-  }
-
-  /**
-   * @body variant - string
-   * @body version - string
-   * @returns A Promise that resolves to set default engine.
-   */
-  async setDefaultEngineVariant(name: string, engineConfig: EngineConfig) {
-    return this.apiInstance().then((api) =>
-      api
-        .post(`v1/engines/${name}/default`, { json: engineConfig })
-        .then((e) => e)
-    ) as Promise<{ messages: string }>
-  }
-
-  /**
-   * @returns A Promise that resolves to update engine.
-   */
-  async updateEngine(name: string, engineConfig?: EngineConfig) {
-    return this.apiInstance().then((api) =>
-      api
-        .post(`v1/engines/${name}/update`, { json: engineConfig })
-        .then((e) => e)
-    ) as Promise<{ messages: string }>
-  }
-
-  /**
-   * Update default local engine
-   * This is to use built-in engine variant in case there is no default engine set
-   */
-  async updateDefaultEngine() {
-    const systemInfo = await systemInformation()
-    try {
-      const variant = await this.getDefaultEngineVariant('llama-cpp')
-      if (
-        (systemInfo.gpuSetting.vulkan && !variant.variant.includes('vulkan')) ||
-        (systemInfo.gpuSetting.vulkan === false &&
-          variant.variant.includes('vulkan'))
-      ) {
-        throw new EngineError('Switch engine.')
-      }
-      const installedEngines = await this.getInstalledEngines('llama-cpp')
-      if (
-        !installedEngines.some(
-          (e) => e.name === variant.variant && e.version === variant.version
-        ) ||
-        variant.version < CORTEX_ENGINE_VERSION
-      ) {
-        throw new EngineError(
-          'Default engine is not available, use bundled version.'
-        )
-      }
-    } catch (error) {
-      if (
-        (error instanceof HTTPError && error.response.status === 400) ||
-        error instanceof EngineError
-      ) {
-        const variant = await engineVariant(systemInfo.gpuSetting)
-        // TODO: Use correct provider name when moving to llama.cpp extension
-        await this.setDefaultEngineVariant('llama-cpp', {
-          variant: variant,
-          version: `${CORTEX_ENGINE_VERSION}`,
-        })
-      } else {
-        console.error('An unexpected error occurred:', error)
-      }
-    }
-  }
-
-  /**
-   * This is to populate default remote engines in case there is no customized remote engine setting
-   */
-  async populateDefaultRemoteEngines() {
-    const engines = await this.getEngines()
-    if (
-      !Object.values(engines)
-        .flat()
-        .some((e) => e.type === 'remote')
-    ) {
-      await Promise.all(
-        DEFAULT_REMOTE_ENGINES.map(async (engine) => {
-          const { id, ...data } = engine
-
-          /// BEGIN - Migrate legacy api key settings
-          let api_key = undefined
-          if (id) {
-            const apiKeyPath = await joinPath([
-              await getJanDataFolderPath(),
-              'settings',
-              id,
-              'settings.json',
-            ])
-            if (await fs.existsSync(apiKeyPath)) {
-              const settings = await fs.readFileSync(apiKeyPath, 'utf-8')
-              api_key = JSON.parse(settings).find(
-                (e) => e.key === `${data.engine}-api-key`
-              )?.controllerProps?.value
-            }
-          }
-          data.api_key = api_key
-          /// END - Migrate legacy api key settings
-
-          await this.addRemoteEngine(data, false).catch(console.error)
-        })
-      )
-      events.emit(EngineEvent.OnEngineUpdate, {})
-      await Promise.all(
-        DEFAULT_REMOTE_MODELS.map((data: Model) =>
-          this.addRemoteModel(data).catch(() => { })
-        )
-      )
-      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
-    }
-  }
-
-  /**
-   * Pulls models list from the remote provider and persist
-   * @param engineConfig
-   * @returns
-   */
-  private populateRemoteModels = async (engineConfig: EngineConfig) => {
-    return this.getRemoteModels(engineConfig.engine)
-      .then((models: ModelList) => {
-        if (models?.data)
-          Promise.all(
-            models.data.map((model) =>
-              this.addRemoteModel({
-                ...model,
-                engine: engineConfig.engine,
-                model: model.model ?? model.id,
-              }).catch(console.info)
-            )
-          ).then(() => {
-            events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
-          })
-      })
-      .catch(console.info)
-  }
-
-  /**
-   * Update engine settings to the latest version
-   */
-  migrate = async () => {
-    // Ensure health check is done
-    const version = await this.getSetting<string>('version', '0.0.0')
-    const engines = await this.getEngines()
-    if (version < VERSION) {
-      console.log('Migrating engine settings...')
-      // Migrate engine settings
-      await Promise.all(
-        DEFAULT_REMOTE_ENGINES.map((engine) => {
-          const { id, ...data } = engine
-
-          data.api_key = engines[id]?.api_key
-          return this.updateEngine(id, {
-            ...data,
-          }).catch(console.error)
-        })
-      )
-      await this.updateSettings([
-        {
-          key: 'version',
-          controllerProps: {
-            value: VERSION,
-          },
-        },
-      ])
-    }
-  }
-}
diff --git a/extensions/engine-management-extension/src/node/index.ts b/extensions/engine-management-extension/src/node/index.ts
deleted file mode 100644
index ce8d9b274..000000000
--- a/extensions/engine-management-extension/src/node/index.ts
+++ /dev/null
@@ -1,69 +0,0 @@
-import * as path from 'path'
-import {
-  appResourcePath,
-  getJanDataFolderPath,
-  log,
-} from '@janhq/core/node'
-import { mkdir, readdir, symlink, cp } from 'fs/promises'
-import { existsSync } from 'fs'
-
-/**
- * Create symlink to each variant for the default bundled version
- * If running in AppImage environment, copy files instead of creating symlinks
- */
-const symlinkEngines = async () => {
-  const sourceEnginePath = path.join(
-    appResourcePath(),
-    'shared',
-    'engines',
-    'llama.cpp'
-  )
-  const symlinkEnginePath = path.join(
-    getJanDataFolderPath(),
-    'engines',
-    'llama.cpp'
-  )
-  const variantFolders = await readdir(sourceEnginePath)
-  const isStandalone = process.platform === 'linux'
-  
-  for (const variant of variantFolders) {
-    const targetVariantPath = path.join(
-      sourceEnginePath,
-      variant,
-      CORTEX_ENGINE_VERSION
-    )
-    const symlinkVariantPath = path.join(
-      symlinkEnginePath,
-      variant,
-      CORTEX_ENGINE_VERSION
-    )
-
-    await mkdir(path.join(symlinkEnginePath, variant), {
-      recursive: true,
-    }).catch((error) => log(JSON.stringify(error)))
-
-    // Skip if already exists
-    if (existsSync(symlinkVariantPath)) {
-      console.log(`Target already exists: ${symlinkVariantPath}`)
-      continue
-    }
-
-    if (isStandalone) {
-      // Copy files for AppImage environments instead of symlinking
-      await cp(targetVariantPath, symlinkVariantPath, { recursive: true }).catch(
-        (error) => log(JSON.stringify(error))
-      )
-      console.log(`Files copied: ${targetVariantPath} -> ${symlinkVariantPath}`)
-    } else {
-      // Create symlink for other environments
-      await symlink(targetVariantPath, symlinkVariantPath, 'junction').catch(
-        (error) => log(JSON.stringify(error))
-      )
-      console.log(`Symlink created: ${targetVariantPath} -> ${symlinkVariantPath}`)
-    }
-  }
-}
-
-export default {
-  symlinkEngines,
-}
diff --git a/extensions/engine-management-extension/src/populateRemoteModels.test.ts b/extensions/engine-management-extension/src/populateRemoteModels.test.ts
deleted file mode 100644
index 225db26cc..000000000
--- a/extensions/engine-management-extension/src/populateRemoteModels.test.ts
+++ /dev/null
@@ -1,139 +0,0 @@
-import { describe, beforeEach, it, expect, vi } from 'vitest'
-import JanEngineManagementExtension from './index'
-import { InferenceEngine } from '@janhq/core'
-
-describe('populateRemoteModels', () => {
-  let extension: JanEngineManagementExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanEngineManagementExtension()
-    vi.resetAllMocks()
-  })
-
-  it('should populate remote models successfully', async () => {
-    const mockEngineConfig = {
-      engine: InferenceEngine.openai,
-    }
-
-    const mockRemoteModels = {
-      data: [
-        {
-          id: 'gpt-4',
-          name: 'GPT-4',
-        },
-      ],
-    }
-
-    const mockGetRemoteModels = vi.spyOn(extension, 'getRemoteModels')
-    mockGetRemoteModels.mockResolvedValue(mockRemoteModels)
-
-    const mockAddRemoteModel = vi.spyOn(extension, 'addRemoteModel')
-    mockAddRemoteModel.mockResolvedValue(undefined)
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        events: {
-          emit: vi.fn(),
-        },
-      }
-    })
-
-    // Use the private method through index.ts
-    // @ts-ignore - Accessing private method for testing
-    await extension.populateRemoteModels(mockEngineConfig)
-
-    expect(mockGetRemoteModels).toHaveBeenCalledWith(mockEngineConfig.engine)
-    expect(mockAddRemoteModel).toHaveBeenCalledWith({
-      ...mockRemoteModels.data[0],
-      engine: mockEngineConfig.engine,
-      model: 'gpt-4',
-    })
-  })
-
-  it('should handle empty data from remote models', async () => {
-    const mockEngineConfig = {
-      engine: InferenceEngine.openai,
-    }
-
-    const mockGetRemoteModels = vi.spyOn(extension, 'getRemoteModels')
-    mockGetRemoteModels.mockResolvedValue({ data: [] })
-
-    const mockAddRemoteModel = vi.spyOn(extension, 'addRemoteModel')
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        events: {
-          emit: vi.fn(),
-        },
-      }
-    })
-
-    // @ts-ignore - Accessing private method for testing
-    await extension.populateRemoteModels(mockEngineConfig)
-
-    expect(mockGetRemoteModels).toHaveBeenCalledWith(mockEngineConfig.engine)
-    expect(mockAddRemoteModel).not.toHaveBeenCalled()
-  })
-
-  it('should handle errors when getting remote models', async () => {
-    const mockEngineConfig = {
-      engine: InferenceEngine.openai,
-    }
-
-    const mockGetRemoteModels = vi.spyOn(extension, 'getRemoteModels')
-    mockGetRemoteModels.mockRejectedValue(new Error('Failed to fetch models'))
-
-    const consoleSpy = vi.spyOn(console, 'info').mockImplementation(() => {})
-
-    // @ts-ignore - Accessing private method for testing
-    await extension.populateRemoteModels(mockEngineConfig)
-
-    expect(mockGetRemoteModels).toHaveBeenCalledWith(mockEngineConfig.engine)
-    expect(consoleSpy).toHaveBeenCalled()
-  })
-
-  it('should handle errors when adding remote models', async () => {
-    const mockEngineConfig = {
-      engine: InferenceEngine.openai,
-    }
-
-    const mockRemoteModels = {
-      data: [
-        {
-          id: 'gpt-4',
-          name: 'GPT-4',
-        },
-      ],
-    }
-
-    const mockGetRemoteModels = vi.spyOn(extension, 'getRemoteModels')
-    mockGetRemoteModels.mockResolvedValue(mockRemoteModels)
-
-    const mockAddRemoteModel = vi.spyOn(extension, 'addRemoteModel')
-    mockAddRemoteModel.mockRejectedValue(new Error('Failed to add model'))
-
-    const consoleSpy = vi.spyOn(console, 'info').mockImplementation(() => {})
-
-    vi.mock('@janhq/core', async (importOriginal) => {
-      const actual = (await importOriginal()) as any
-      return {
-        ...actual,
-        events: {
-          emit: vi.fn(),
-        },
-      }
-    })
-
-    // @ts-ignore - Accessing private method for testing
-    await extension.populateRemoteModels(mockEngineConfig)
-
-    expect(mockGetRemoteModels).toHaveBeenCalledWith(mockEngineConfig.engine)
-    expect(mockAddRemoteModel).toHaveBeenCalled()
-    expect(consoleSpy).toHaveBeenCalled()
-  })
-})
\ No newline at end of file
diff --git a/extensions/engine-management-extension/src/utils.test.ts b/extensions/engine-management-extension/src/utils.test.ts
deleted file mode 100644
index e453f58cb..000000000
--- a/extensions/engine-management-extension/src/utils.test.ts
+++ /dev/null
@@ -1,90 +0,0 @@
-import { describe, it, expect, vi } from 'vitest'
-import { engineVariant } from './utils'
-
-vi.mock('@janhq/core', () => {
-  return {
-    log: () => {},
-  }
-})
-
-describe('engineVariant', () => {
-  it('should return mac-arm64 when platform is darwin and arch is arm64', async () => {
-    vi.stubGlobal('PLATFORM', 'darwin')
-    const result = await engineVariant({
-      cpu: { arch: 'arm64', instructions: '' },
-      gpus: [],
-      vulkan: false,
-    })
-    expect(result).toBe('mac-arm64')
-  })
-
-  it('should return mac-amd64 when platform is darwin and arch is not arm64', async () => {
-    vi.stubGlobal('PLATFORM', 'darwin')
-    const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: [] },
-      gpus: [],
-      vulkan: false,
-    })
-    expect(result).toBe('mac-amd64')
-  })
-
-  it('should return windows-amd64-noavx-cuda-12-0 when platform is win32, cuda is enabled, and cuda version is 12', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: ['avx2'] },
-      gpus: [
-        {
-          activated: true,
-          version: '12',
-          additional_information: { driver_version: '1.0' },
-        },
-      ],
-      vulkan: false,
-    })
-    expect(result).toBe('windows-amd64-avx2-cuda-12-0')
-  })
-
-  it('should return linux-amd64-noavx-cuda-11-7 when platform is linux, cuda is enabled, and cuda version is 11', async () => {
-    vi.stubGlobal('PLATFORM', 'linux')
-    const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: [] },
-      gpus: [
-        {
-          activated: true,
-          version: '11',
-          additional_information: { driver_version: '1.0' },
-        },
-      ],
-      vulkan: false,
-    })
-    expect(result).toBe('linux-amd64-noavx-cuda-11-7')
-  })
-
-  it('should return windows-amd64-vulkan when platform is win32 and vulkan is enabled', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: [] },
-      gpus: [{ activated: true, version: '12' }],
-      vulkan: true,
-    })
-    expect(result).toBe('windows-amd64-vulkan')
-  })
-
-  it('should return windows-amd64-avx512 when platform is win32, no gpu detected and avx512 cpu instruction is supported', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: ['avx512'] },
-      gpus: [{ activated: true, version: '12' }],
-    })
-    expect(result).toBe('windows-amd64-avx512')
-  })
-
-  it('should return windows-amd64-avx512 when platform is win32, no gpu detected and no accelerated cpu instructions are supported', async () => {
-    vi.stubGlobal('PLATFORM', 'win32')
-    const result = await engineVariant({
-      cpu: { arch: 'x64', instructions: [''] },
-      gpus: [{ activated: true, version: '12' }],
-    })
-    expect(result).toBe('windows-amd64-noavx')
-  })
-})
diff --git a/extensions/engine-management-extension/src/utils.ts b/extensions/engine-management-extension/src/utils.ts
deleted file mode 100644
index bc5b09fd3..000000000
--- a/extensions/engine-management-extension/src/utils.ts
+++ /dev/null
@@ -1,105 +0,0 @@
-import { GpuSetting, log } from '@janhq/core'
-
-// Supported run modes
-enum RunMode {
-  Cuda = 'cuda',
-  CPU = 'cpu',
-}
-
-// Supported instruction sets
-const instructionBinaryNames = ['noavx', 'avx', 'avx2', 'avx512']
-
-/**
- * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
- * @param settings
- * @returns
- */
-
-const gpuRunMode = (settings?: GpuSetting): RunMode => {
-  return settings.gpus?.some(
-    (gpu) =>
-      gpu.activated &&
-      gpu.additional_information &&
-      gpu.additional_information.driver_version
-  )
-    ? RunMode.Cuda
-    : RunMode.CPU
-}
-
-/**
- * The OS & architecture that the current process is running on.
- * @returns win, mac-x64, mac-arm64, or linux
- */
-const os = (settings?: GpuSetting): string => {
-  return PLATFORM === 'win32'
-    ? 'win'
-    : PLATFORM === 'darwin'
-    ? settings?.cpu?.arch === 'arm64'
-      ? 'macos-arm64'
-      : 'macos-x64'
-    : 'linux'
-}
-
-/**
- * The CUDA version that will be set - either 'cu12.0' or 'cu11.7'.
- * @param settings
- * @returns
- */
-const cudaVersion = (
-  settings?: GpuSetting
-): 'cu12.0' | 'cu11.7' | undefined => {
-  return settings.gpus?.some((gpu) => gpu.version.includes('12'))
-    ? 'cu12.0'
-    : 'cu11.7'
-}
-
-/**
- * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
- * @returns
- */
-
-/**
- * Find which variant to run based on the current platform.
- */
-export const engineVariant = async (
-  gpuSetting?: GpuSetting
-): Promise<string> => {
-  const platform = os(gpuSetting)
-
-  // There is no need to append the variant extension for mac
-  if (platform.startsWith('mac')) return platform
-
-  const runMode = gpuRunMode(gpuSetting)
-  // Only Nvidia GPUs have addition_information set and activated by default
-  let engineVariant =
-    !gpuSetting?.vulkan &&
-    (!gpuSetting.gpus?.length ||
-      gpuSetting.gpus.some((e) => e.additional_information && e.activated))
-      ? [
-          platform,
-          ...(runMode === RunMode.Cuda
-            ? // For cuda we only need to check if the cpu supports avx2 or noavx - since other binaries are not shipped with the extension
-              [
-                gpuSetting.cpu?.instructions.includes('avx2') ||
-                gpuSetting.cpu?.instructions.includes('avx512')
-                  ? 'avx2'
-                  : 'noavx',
-                runMode,
-                cudaVersion(gpuSetting),
-                'x64',
-              ]
-            : // For cpu only we need to check all available supported instructions
-              [
-                (gpuSetting.cpu?.instructions ?? ['noavx']).find((e) =>
-                  instructionBinaryNames.includes(e.toLowerCase())
-                ) ?? 'noavx',
-                'x64',
-              ]),
-        ].filter(Boolean)
-      : [platform, 'vulkan', 'x64']
-
-  let engineVariantString = engineVariant.join('-')
-
-  log(`[CORTEX]: Engine variant: ${engineVariantString}`)
-  return engineVariantString
-}
diff --git a/extensions/engine-management-extension/tsconfig.json b/extensions/engine-management-extension/tsconfig.json
deleted file mode 100644
index 72e1e1895..000000000
--- a/extensions/engine-management-extension/tsconfig.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src",
-    "resolveJsonModule": true
-  },
-  "include": ["./src"],
-  "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
-}
diff --git a/extensions/hardware-management-extension/jest.config.js b/extensions/hardware-management-extension/jest.config.js
deleted file mode 100644
index 8bb37208d..000000000
--- a/extensions/hardware-management-extension/jest.config.js
+++ /dev/null
@@ -1,5 +0,0 @@
-/** @type {import('ts-jest').JestConfigWithTsJest} */
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-}
diff --git a/extensions/hardware-management-extension/package.json b/extensions/hardware-management-extension/package.json
deleted file mode 100644
index 08346b3f2..000000000
--- a/extensions/hardware-management-extension/package.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
-  "name": "@janhq/hardware-management-extension",
-  "productName": "Hardware Management",
-  "version": "1.0.0",
-  "description": "Manages hardware settings.",
-  "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
-  "author": "Jan <service@jan.ai>",
-  "license": "MIT",
-  "scripts": {
-    "test": "jest",
-    "build": "rolldown -c rolldown.config.mjs",
-    "codesign:darwin": "../../.github/scripts/auto-sign.sh",
-    "codesign:win32:linux": "echo 'No codesigning required'",
-    "codesign": "run-script-os",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "exports": {
-    ".": "./dist/index.js",
-    "./main": "./dist/module.js"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "^1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
-    "ts-loader": "^9.5.0",
-    "typescript": "^5.3.3"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "ky": "^1.7.2",
-    "p-queue": "^8.0.1"
-  },
-  "bundledDependencies": [
-    "@janhq/core"
-  ],
-  "hardwares": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ]
-}
diff --git a/extensions/hardware-management-extension/rolldown.config.mjs b/extensions/hardware-management-extension/rolldown.config.mjs
deleted file mode 100644
index 1a9c34ba0..000000000
--- a/extensions/hardware-management-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,16 +0,0 @@
-import { defineConfig } from 'rolldown'
-import pkgJson from './package.json' with { type: 'json' }
-
-export default defineConfig([
-  {
-    input: 'src/index.ts',
-    output: {
-      format: 'esm',
-      file: 'dist/index.js',
-    },
-    define: {
-      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
-      API_URL: JSON.stringify(`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? "39291"}`),
-    },
-  },
-])
diff --git a/extensions/hardware-management-extension/src/@types/global.d.ts b/extensions/hardware-management-extension/src/@types/global.d.ts
deleted file mode 100644
index a412681e8..000000000
--- a/extensions/hardware-management-extension/src/@types/global.d.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-declare const API_URL: string
-declare const NODE: string
-
-interface Core {
-  api: APIFunctions
-  events: EventEmitter
-}
-interface Window {
-  core?: Core | undefined
-  electronAPI?: any | undefined
-}
diff --git a/extensions/hardware-management-extension/src/index.ts b/extensions/hardware-management-extension/src/index.ts
deleted file mode 100644
index bd94f3828..000000000
--- a/extensions/hardware-management-extension/src/index.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-import { HardwareManagementExtension, HardwareInformation } from '@janhq/core'
-import ky, { KyInstance } from 'ky'
-
-/**
- * JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
- * functionality for managing engines.
- */
-export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
-  /**
-   * Called when the extension is loaded.
-   */
-  async onLoad() {}
-
-  api?: KyInstance
-  /**
-   * Get the API instance
-   * @returns
-   */
-  async apiInstance(): Promise<KyInstance> {
-    if (this.api) return this.api
-    const apiKey = (await window.core?.api.appToken())
-    this.api = ky.extend({
-      prefixUrl: API_URL,
-      headers: apiKey
-        ? {
-            Authorization: `Bearer ${apiKey}`,
-          }
-        : {},
-      retry: 10,
-    })
-    return this.api
-  }
-
-  /**
-   * Called when the extension is unloaded.
-   */
-  onUnload() {}
-
-  /**
-   * @returns A Promise that resolves to an object of hardware.
-   */
-  async getHardware(): Promise<HardwareInformation> {
-    return this.apiInstance().then((api) =>
-      api
-        .get('v1/hardware')
-        .json<HardwareInformation>()
-        .then((e) => e)
-    ) as Promise<HardwareInformation>
-  }
-
-  /**
-   * @returns A Promise that resolves to an object of set gpu activate.
-   */
-  async setActiveGpu(data: { gpus: number[] }): Promise<{
-    message: string
-    activated_gpus: number[]
-  }> {
-    return this.apiInstance().then((api) =>
-      api.post('v1/hardware/activate', { json: data }).then((e) => e)
-    ) as Promise<{
-      message: string
-      activated_gpus: number[]
-    }>
-  }
-}
diff --git a/extensions/hardware-management-extension/tsconfig.json b/extensions/hardware-management-extension/tsconfig.json
deleted file mode 100644
index 72e1e1895..000000000
--- a/extensions/hardware-management-extension/tsconfig.json
+++ /dev/null
@@ -1,16 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "ES6",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src",
-    "resolveJsonModule": true
-  },
-  "include": ["./src"],
-  "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
-}
diff --git a/extensions/llamacpp-extension/src/index.ts b/extensions/llamacpp-extension/src/index.ts
index bbd682145..f9fccd268 100644
--- a/extensions/llamacpp-extension/src/index.ts
+++ b/extensions/llamacpp-extension/src/index.ts
@@ -152,6 +152,16 @@ export default class llamacpp_extension extends AIEngine {
     ])
   }
 
+  async getProviderPath(): Promise<string> {
+    if (!this.providerPath) {
+      this.providerPath = await joinPath([
+        await getJanDataFolderPath(),
+        this.providerId,
+      ])
+    }
+    return this.providerPath
+  }
+
   override async onUnload(): Promise<void> {
     // Terminate all active sessions
     for (const [_, sInfo] of this.activeSessions) {
@@ -193,7 +203,7 @@ export default class llamacpp_extension extends AIEngine {
 
   // Implement the required LocalProvider interface methods
   override async list(): Promise<modelInfo[]> {
-    const modelsDir = await joinPath([this.providerPath, 'models'])
+    const modelsDir = await joinPath([await this.getProviderPath(), 'models'])
     if (!(await fs.existsSync(modelsDir))) {
       return []
     }
@@ -262,7 +272,7 @@ export default class llamacpp_extension extends AIEngine {
       )
 
     const configPath = await joinPath([
-      this.providerPath,
+      await this.getProviderPath(),
       'models',
       modelId,
       'model.yml',
@@ -498,7 +508,7 @@ export default class llamacpp_extension extends AIEngine {
 
     console.log('Calling Tauri command llama_load with args:', args)
     const backendPath = await getBackendExePath(backend, version)
-    const libraryPath = await joinPath([this.providerPath, 'lib'])
+    const libraryPath = await joinPath([await this.getProviderPath(), 'lib'])
 
     try {
       // TODO: add LIBRARY_PATH
@@ -568,7 +578,9 @@ export default class llamacpp_extension extends AIEngine {
     if (!response.ok) {
       const errorData = await response.json().catch(() => null)
       throw new Error(
-        `API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
+        `API request failed with status ${response.status}: ${JSON.stringify(
+          errorData
+        )}`
       )
     }
 
@@ -622,7 +634,8 @@ export default class llamacpp_extension extends AIEngine {
   }
 
   override async chat(
-    opts: chatCompletionRequest
+    opts: chatCompletionRequest,
+    abortController?: AbortController
   ): Promise<chatCompletion | AsyncIterable<chatCompletionChunk>> {
     const sessionInfo = this.findSessionByModel(opts.model)
     if (!sessionInfo) {
@@ -630,6 +643,7 @@ export default class llamacpp_extension extends AIEngine {
     }
     const baseUrl = `http://localhost:${sessionInfo.port}/v1`
     const url = `${baseUrl}/chat/completions`
+    console.log('Session Info:', sessionInfo, sessionInfo.api_key)
     const headers = {
       'Content-Type': 'application/json',
       'Authorization': `Bearer ${sessionInfo.api_key}`,
@@ -644,12 +658,15 @@ export default class llamacpp_extension extends AIEngine {
       method: 'POST',
       headers,
       body,
+      signal: abortController?.signal,
     })
 
     if (!response.ok) {
       const errorData = await response.json().catch(() => null)
       throw new Error(
-        `API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
+        `API request failed with status ${response.status}: ${JSON.stringify(
+          errorData
+        )}`
       )
     }
 
@@ -657,7 +674,11 @@ export default class llamacpp_extension extends AIEngine {
   }
 
   override async delete(modelId: string): Promise<void> {
-    const modelDir = await joinPath([this.providerPath, 'models', modelId])
+    const modelDir = await joinPath([
+      await this.getProviderPath(),
+      'models',
+      modelId,
+    ])
 
     if (!(await fs.existsSync(await joinPath([modelDir, 'model.yml'])))) {
       throw new Error(`Model ${modelId} does not exist`)
diff --git a/extensions/model-extension/README.md b/extensions/model-extension/README.md
deleted file mode 100644
index b9595b6e1..000000000
--- a/extensions/model-extension/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/menloresearch/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
deleted file mode 100644
index 153c22fdf..000000000
--- a/extensions/model-extension/package.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-  "name": "@janhq/model-extension",
-  "productName": "Model Management",
-  "version": "1.0.36",
-  "description": "Manages model operations including listing, importing, updating, and deleting.",
-  "main": "dist/index.js",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "test": "vitest run",
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
-    "typescript": "5.3.3",
-    "vitest": "^3.0.6"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "ky": "^1.7.2",
-    "p-queue": "^8.0.1"
-  },
-  "bundleDependencies": [],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/model-extension/resources/default.json b/extensions/model-extension/resources/default.json
deleted file mode 100644
index bd7c7e63b..000000000
--- a/extensions/model-extension/resources/default.json
+++ /dev/null
@@ -1,6635 +0,0 @@
-[
-  {
-    "author": "Menlo",
-    "id": "Menlo/Jan-nano-gguf",
-    "metadata": {
-      "_id": "68492cd9cada68b1d11ca1bd",
-      "author": "Menlo",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation"
-      },
-      "createdAt": "2025-06-11T07:14:33.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\n---\n# Jan Nano\n\n\n\n![image/png](https://cdn-uploads.huggingface.co/production/uploads/657a81129ea9d52e5cbd67f7/YQci8jiHjAAFpXWYOadrU.png)\n\n## Overview\n\nJan Nano is a fine-tuned language model built on top of the Qwen3 architecture. Developed as part of the Jan ecosystem, it balances compact size and extended context length, making it ideal for efficient, high-quality text generation in local or embedded environments.\n\n## Features\n\n- **Tool Use**: Excellent function calling and tool integration\n- **Research**: Enhanced research and information processing capabilities\n- **Small Model**: VRAM efficient for local deployment\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)",
-      "disabled": false,
-      "downloads": 1434,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen3",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %} {{- '<|im_start|>system\\n' }} {%- if messages[0].role == 'system' %} {{- messages[0].content + '\\n\\n' }} {%- endif %} {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }} {%- for tool in tools %} {{- \"\\n\" }} {{- tool | tojson }} {%- endfor %} {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }} {%- else %} {%- if messages[0].role == 'system' %} {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} {%- set index = (messages|length - 1) - loop.index0 %} {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %} {%- set ns.multi_step_tool = false %} {%- set ns.last_query_index = index %} {%- endif %} {%- endfor %} {%- for message in messages %} {%- if message.content is string %} {%- set content = message.content %} {%- else %} {%- set content = '' %} {%- endif %} {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %} {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }} {%- elif message.role == \"assistant\" %} {%- set reasoning_content = '' %} {%- if message.reasoning_content is string %} {%- set reasoning_content = message.reasoning_content %} {%- else %} {%- if '</think>' in content %} {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %} {%- set content = content.split('</think>')[-1].lstrip('\\n') %} {%- endif %} {%- endif %} {%- if loop.index0 > ns.last_query_index %} {%- if loop.last or (not loop.last and reasoning_content) %} {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- else %} {{- '<|im_start|>' + message.role + '\\n' + content }} {%- endif %} {%- if message.tool_calls %} {%- for tool_call in message.tool_calls %} {%- if (loop.first and content) or (not loop.first) %} {{- '\\n' }} {%- endif %} {%- if tool_call.function %} {%- set tool_call = tool_call.function %} {%- endif %} {{- '<tool_call>\\n{\"name\": \"' }} {{- tool_call.name }} {{- '\", \"arguments\": ' }} {%- if tool_call.arguments is string %} {{- tool_call.arguments }} {%- else %} {{- tool_call.arguments | tojson }} {%- endif %} {{- '}\\n</tool_call>' }} {%- endfor %} {%- endif %} {{- '<|im_end|>\\n' }} {%- elif message.role == \"tool\" %} {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %} {{- '<|im_start|>user' }} {%- endif %} {{- '\\n<tool_response>\\n' }} {{- content }} {{- '\\n</tool_response>' }} {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %} {{- '<|im_end|>\\n' }} {%- endif %} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\\n' }} {{- '<think>\\n\\n</think>\\n\\n' }} {%- endif %}",
-        "context_length": 40960,
-        "eos_token": "<|im_end|>",
-        "quantize_imatrix_file": "imatrix.dat",
-        "total": 4022468096
-      },
-      "id": "Menlo/Jan-nano-gguf",
-      "lastModified": "2025-06-13T16:57:55.000Z",
-      "likes": 3,
-      "model-index": null,
-      "modelId": "Menlo/Jan-nano-gguf",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "a04aab0878648d8f284c63a52664a482ead16f06",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes",
-          "size": 3460
-        },
-        {
-          "rfilename": "README.md",
-          "size": 776
-        },
-        {
-          "rfilename": "jan-nano-4b-iQ4_XS.gguf",
-          "size": 2270750400
-        },
-        {
-          "rfilename": "jan-nano-4b-Q3_K_L.gguf",
-          "size": 2239784384
-        },
-        {
-          "rfilename": "jan-nano-4b-Q3_K_M.gguf",
-          "size": 2075616704
-        },
-        {
-          "rfilename": "jan-nano-4b-Q3_K_S.gguf",
-          "size": 1886995904
-        },
-        {
-          "rfilename": "jan-nano-4b-Q4_0.gguf",
-          "size": 2369545664
-        },
-        {
-          "rfilename": "jan-nano-4b-Q4_1.gguf",
-          "size": 2596627904
-        },
-        {
-          "rfilename": "jan-nano-4b-Q4_K_M.gguf",
-          "size": 2497279424
-        },
-        {
-          "rfilename": "jan-nano-4b-Q4_K_S.gguf",
-          "size": 2383308224
-        },
-        {
-          "rfilename": "jan-nano-4b-Q5_0.gguf",
-          "size": 2823710144
-        },
-        {
-          "rfilename": "jan-nano-4b-Q5_1.gguf",
-          "size": 3050792384
-        },
-        {
-          "rfilename": "jan-nano-4b-Q5_K_M.gguf",
-          "size": 2889512384
-        },
-        {
-          "rfilename": "jan-nano-4b-Q5_K_S.gguf",
-          "size": 2823710144
-        },
-        {
-          "rfilename": "jan-nano-4b-Q6_K.gguf",
-          "size": 3306259904
-        },
-        {
-          "rfilename": "jan-nano-4b-Q8_0.gguf",
-          "size": 4280403904
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "imatrix",
-        "conversational"
-      ],
-      "usedStorage": 93538518464,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-iQ4_XS.gguf",
-        "size": 2270750400
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_L.gguf",
-        "size": 2239784384
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_M.gguf",
-        "size": 2075616704
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q3_K_S.gguf",
-        "size": 1886995904
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_0.gguf",
-        "size": 2369545664
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_1.gguf",
-        "size": 2596627904
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_M.gguf",
-        "size": 2497279424
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q4_K_S.gguf",
-        "size": 2383308224
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_0.gguf",
-        "size": 2823710144
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_1.gguf",
-        "size": 3050792384
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_M.gguf",
-        "size": 2889512384
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q5_K_S.gguf",
-        "size": 2823710144
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q6_K.gguf",
-        "size": 3306259904
-      },
-      {
-        "id": "Menlo:Jan-nano-gguf:jan-nano-4b-Q8_0.gguf",
-        "size": 4280403904
-      }
-    ]
-  },
-  {
-    "author": "PrimeIntellect",
-    "id": "cortexso/intellect-2",
-    "metadata": {
-      "_id": "6821ac2482ae7d76d34abdb8",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2025-05-12T08:07:00.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Prime Intellect** released **INTELLECT-2**, a 32 billion parameter large language model (LLM) trained through distributed reinforcement learning on globally donated GPU resources. Built on the **Qwen2** architecture and fine-tuned with the **prime-rl** framework, INTELLECT-2 demonstrates strong performance in math, coding, and logical reasoning.\n\nThis model leverages GRPO (Generalized Reinforcement Policy Optimization) over verifiable rewards, introducing asynchronous distributed RL training with enhanced stability techniques. While its primary focus was on verifiable mathematical and coding tasks, it remains compatible with general-purpose text generation tasks.\n\n## Variants\n\n### INTELLECT-2\n\n| No | Variant                                                                         | Branch | Cortex CLI command                 |\n|----|----------------------------------------------------------------------------------|--------|-----------------------------------|\n| 1  | [INTELLECT-2 (32B)](https://huggingface.co/cortexso/intellect-2/tree/32b) | 32b    | `cortex run intellect-2:32b`      |\n\nEach branch includes multiple GGUF quantized versions, optimized for various hardware configurations:\n- **INTELLECT-2-32B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/intellect-2\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run intellect-2\n   ```\n\n## Credits\n\n- **Author:** Prime Intellect\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [Apache-2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper:** [Intellect 2 Technical Report](https://storage.googleapis.com/public-technical-paper/INTELLECT_2_Technical_Report.pdf)",
-      "disabled": false,
-      "downloads": 1436,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- '' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n  {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" and not message.tool_calls %}\n        {%- set content = message.content %}\n        {%- if not loop.last %}\n            {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {%- endif %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- if not loop.last %}\n            {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {%- endif %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
-        "context_length": 40960,
-        "eos_token": "<|im_end|>",
-        "total": 32763876352
-      },
-      "id": "cortexso/intellect-2",
-      "lastModified": "2025-05-12T14:18:35.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/intellect-2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "9d237b26053af28e0119331e0dfbc75b45a0317b",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "intellect-2-q2_k.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q6_k.gguf"
-        },
-        {
-          "rfilename": "intellect-2-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 206130755200,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "intellect-2:32b",
-        "size": 19851336256
-      }
-    ]
-  },
-  {
-    "author": "Microsoft",
-    "id": "cortexso/phi-4-reasoning",
-    "metadata": {
-      "_id": "681857cda178d73748a1295f",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2025-05-05T06:16:45.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Microsoft Research** developed and released the **Phi-4-reasoning** series, a cutting-edge family of reasoning-focused language models optimized for chain-of-thought (CoT), step-by-step problem solving, and high-efficiency inference. These models excel in advanced mathematical reasoning, scientific Q&A, and instruction-following scenarios.\n\nThe Phi-4 models introduce extended context lengths, ChatML reasoning templates, and strong performance on benchmark datasets, while maintaining compact sizes that are ideal for memory- and latency-constrained environments.\n\n## Variants\n\n### Phi-4-reasoning\n\n| No | Variant                                                                            | Branch     | Cortex CLI command                   |\n|----|-------------------------------------------------------------------------------------|------------|-------------------------------------|\n| 1  | [phi-4-mini-reasoning](https://huggingface.co/microsoft/phi-4-mini-reasoning)      | 4b         | `cortex run phi4:4b`                |\n| 2  | [phi-4-reasoning](https://huggingface.co/microsoft/phi-4-reasoning-plus)      | 14b   | `cortex run phi4:14b`          |\n| 3  | [phi-4-reasoning-plus](https://huggingface.co/microsoft/phi-4-reasoning-plus)      | 14b-plus   | `cortex run phi4:14b-plus`          |\n\nEach branch supports multiple quantized GGUF versions:\n- **phi-4-mini-reasoning:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **phi-4-reasoning:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **phi-4-reasoning-plus:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/phi4\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run phi4\n   ```\n\n## Credits\n\n- **Author:** Microsoft Research\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [MIT License](https://opensource.org/license/mit/)\n- **Blogs:** [Phi-4 Reasoning Blog](https://www.microsoft.com/en-us/research/blog/)\n",
-      "disabled": false,
-      "downloads": 2894,
-      "gated": false,
-      "gguf": {
-        "architecture": "phi3",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{{ '<|system|>Your name is Phi, an AI math expert developed by Microsoft.' }}{% for message in messages %}{% if message['role'] == 'system' %} {{ message['content'] }}{% if 'tools' in message and message['tools'] is not none %}{{ '<|tool|>' + message['tools'] + '<|/tool|>' }}{% endif %}{% endif %}{% endfor %}{{ '<|end|>' }}{% for message in messages %}{% if message['role'] != 'system' %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<|endoftext|>",
-        "total": 3836021856
-      },
-      "id": "cortexso/phi-4-reasoning",
-      "lastModified": "2025-05-05T09:36:18.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/phi-4-reasoning",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "218f08078412d1bcd46e7ce48c4442b14b98164d",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q2_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q6_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-mini-reasoning-q8_0.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q2_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q6_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-plus-q8_0.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q2_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q6_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-reasoning-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 212004788352,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "phi-4-reasoning:14b",
-        "size": 9053115968
-      },
-      {
-        "id": "phi-4-reasoning:4b",
-        "size": 2491874464
-      },
-      {
-        "id": "phi-4-reasoning:14b-plus",
-        "size": 9053116000
-      }
-    ]
-  },
-  {
-    "author": "Internlm",
-    "id": "cortexso/internlm3-8b-it",
-    "metadata": {
-      "_id": "678dcf22fbe4dceca4562d1f",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-20T04:20:50.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**InternLM** developed and released the [InternLM3-8B-Instruct](https://huggingface.co/internlm/InternLM3-8B-Instruct), an 8-billion parameter instruction-tuned language model designed for general-purpose usage and advanced reasoning tasks. The model delivers state-of-the-art performance on reasoning and knowledge-intensive tasks, outperforming other models like Llama3.1-8B and Qwen2.5-7B. Trained on 4 trillion high-quality tokens, InternLM3 achieves exceptional efficiency, reducing training costs by over 75% compared to other models of similar scale. \n\nThe model features dual operational modes: a deep thinking mode for solving complex reasoning tasks through long chain-of-thought processes and a normal response mode for fluent and interactive user experiences. These capabilities make InternLM3-8B-Instruct ideal for applications in conversational AI, advanced reasoning, and general-purpose language understanding.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Internlm3-8b-it](https://huggingface.co/cortexso/internlm3-8b-it/tree/8b) | `cortex run internlm3-8b-it:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/internlm3-8b-it\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run internlm3-8b-it\n    ```\n\n## Credits\n\n- **Author:** InternLM\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/internlm/internlm3-8b-instruct/blob/main/LICENSE.txt)\n- **Papers:** [InternLM2 Technical Report](https://arxiv.org/abs/2403.17297)",
-      "disabled": false,
-      "downloads": 229,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 8804241408
-      },
-      "id": "cortexso/internlm3-8b-it",
-      "lastModified": "2025-03-03T05:57:41.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/internlm3-8b-it",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "957eb6aa16a10eda3ce1a87dcacfd99bda5c469a",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "internlm3-8b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2403.17297",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 56027406208,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "internlm3-8b-it:8b",
-        "size": 5358623936
-      }
-    ]
-  },
-  {
-    "author": "Google",
-    "id": "cortexso/gemma3",
-    "metadata": {
-      "_id": "67d14a4c2e461dfe226bd1be",
-      "author": "cortexso",
-      "cardData": {
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2025-03-12T08:48:12.000Z",
-      "description": "---\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n## Overview\n**Google** developed and released the **Gemma 3** series, featuring multiple model sizes with both pre-trained and instruction-tuned variants. These multimodal models handle both text and image inputs while generating text outputs, making them versatile for various applications. Gemma 3 models are built from the same research and technology used to create the Gemini models, offering state-of-the-art capabilities in a lightweight and accessible format.\n\nThe Gemma 3 models include four different sizes with open weights, providing excellent performance across tasks like question answering, summarization, and reasoning while maintaining efficiency for deployment in resource-constrained environments such as laptops, desktops, or custom cloud infrastructure.\n\n## Variants\n\n### Gemma 3\n| No | Variant                                                | Branch | Cortex CLI command            |\n| -- | ------------------------------------------------------ | ------ | ----------------------------- |\n| 1  | [Gemma-3-1B](https://huggingface.co/cortexso/gemma3/tree/1b)   | 1b     | `cortex run gemma3:1b`        |\n| 2  | [Gemma-3-4B](https://huggingface.co/cortexso/gemma3/tree/4b)   | 4b     | `cortex run gemma3:4b`        |\n| 3  | [Gemma-3-12B](https://huggingface.co/cortexso/gemma3/tree/12b) | 12b    | `cortex run gemma3:12b`       |\n| 4  | [Gemma-3-27B](https://huggingface.co/cortexso/gemma3/tree/27b) | 27b    | `cortex run gemma3:27b`       |\n\nEach branch contains a default quantized version.\n\n### Key Features\n- **Multimodal capabilities**: Handles both text and image inputs\n- **Large context window**: 128K tokens\n- **Multilingual support**: Over 140 languages\n- **Available in multiple sizes**: From 1B to 27B parameters\n- **Open weights**: For both pre-trained and instruction-tuned variants\n\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/gemma3\n   ```\n\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run gemma3\n   ```\n\n## Credits\n- **Author:** Google\n- **Original License:** [Gemma License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma 3 Technical Report](https://storage.googleapis.com/deepmind-media/gemma/Gemma3Report.pdf)",
-      "disabled": false,
-      "downloads": 5425,
-      "gated": false,
-      "gguf": {
-        "architecture": "gemma3",
-        "bos_token": "<bos>",
-        "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n    {%- if messages[0]['content'] is string -%}\n        {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n    {%- else -%}\n        {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n    {%- endif -%}\n    {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n    {%- set first_user_prefix = \"\" -%}\n    {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n        {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n    {%- endif -%}\n    {%- if (message['role'] == 'assistant') -%}\n        {%- set role = \"model\" -%}\n    {%- else -%}\n        {%- set role = message['role'] -%}\n    {%- endif -%}\n    {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n    {%- if message['content'] is string -%}\n        {{ message['content'] | trim }}\n    {%- elif message['content'] is iterable -%}\n        {%- for item in message['content'] -%}\n            {%- if item['type'] == 'image' -%}\n                {{ '<start_of_image>' }}\n            {%- elif item['type'] == 'text' -%}\n                {{ item['text'] | trim }}\n            {%- endif -%}\n        {%- endfor -%}\n    {%- else -%}\n        {{ raise_exception(\"Invalid content type\") }}\n    {%- endif -%}\n    {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
-        "context_length": 131072,
-        "eos_token": "<eos>",
-        "total": 11765788416
-      },
-      "id": "cortexso/gemma3",
-      "lastModified": "2025-05-13T12:45:28.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/gemma3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "289bd96e0dbb2f82e77c56c9c09d66ff76769895",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-12b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-1b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-27b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-3-4b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 280561347040,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "gemma3:4b",
-        "size": 2489757760
-      },
-      {
-        "id": "gemma3:27b",
-        "size": 16546404640
-      },
-      {
-        "id": "gemma3:12b",
-        "size": 7300574912
-      },
-      {
-        "id": "gemma3:1b",
-        "size": 806058144
-      }
-    ]
-  },
-  {
-    "author": "Qwen",
-    "id": "cortexso/qwen-qwq",
-    "metadata": {
-      "_id": "67c909487c87605263db5352",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2025-03-06T02:32:40.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview  \n\n**QwQ** is the reasoning model of the **Qwen** series. Unlike conventional instruction-tuned models, **QwQ** is designed to think and reason, achieving significantly enhanced performance in downstream tasks, especially challenging problem-solving scenarios.  \n\n**QwQ-32B** is the **medium-sized** reasoning model in the QwQ family, capable of **competitive performance** against state-of-the-art reasoning models, such as **DeepSeek-R1** and **o1-mini**. It is optimized for tasks requiring logical deduction, multi-step reasoning, and advanced comprehension.  \n\nThe model is well-suited for **AI research, automated theorem proving, advanced dialogue systems, and high-level decision-making applications**.  \n\n## Variants  \n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [QwQ-32B](https://huggingface.co/cortexso/qwen-qwq/tree/main) | `cortex run qwen-qwq:32b` |  \n\n## Use it with Jan (UI)  \n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)  \n2. Use in Jan model Hub:  \n    ```bash\n    cortexso/qwen-qwq\n    ```  \n\n## Use it with Cortex (CLI)  \n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)  \n2. Run the model with command:  \n    ```bash\n    cortex run qwen-qwq\n    ```  \n\n## Credits  \n\n- **Author:** Qwen Team  \n- **Converter:** [Homebrew](https://www.homebrew.ltd/)  \n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)  \n- **Paper:** [Introducing QwQ-32B: The Medium-Sized Reasoning Model](https://qwenlm.github.io/blog/qwq-32b/)",
-      "disabled": false,
-      "downloads": 582,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- '' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n  {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" and not message.tool_calls %}\n        {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
-        "context_length": 131072,
-        "eos_token": "<|im_end|>",
-        "total": 32763876352
-      },
-      "id": "cortexso/qwen-qwq",
-      "lastModified": "2025-03-13T02:39:51.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/qwen-qwq",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "17e393edf64f5ecca3089b4b5822d05a165882bd",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "qwq-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 206130754880,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "qwen-qwq:32b",
-        "size": 19851336224
-      }
-    ]
-  },
-  {
-    "author": "DeepCogito",
-    "id": "cortexso/cogito-v1",
-    "metadata": {
-      "_id": "67f67ca2c68bea1f264edc11",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2025-04-09T13:56:50.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**DeepCogito** introduces the **Cogito-v1 Preview** series, a powerful suite of hybrid reasoning models trained with Iterated Distillation and Amplification (IDA). These models are designed to push the boundaries of open-weight LLMs through scalable alignment and self-improvement strategies, offering unmatched performance across coding, STEM, multilingual, and agentic use cases.\n\nEach model in this series operates in both **standard** (direct answer) and **reasoning** (self-reflective) modes, significantly outperforming size-equivalent open models such as LLaMA, DeepSeek, and Qwen. The 70B variant notably surpasses the newly released LLaMA 4 109B MoE model in benchmarks.\n\n## Variants\n\n### Cogito-v1 Preview\n\n| No | Variant                                                                                         | Branch | Cortex CLI command                            |\n|----|--------------------------------------------------------------------------------------------------|--------|-----------------------------------------------|\n| 1  | [Cogito-v1-Preview-LLaMA-3B](https://huggingface.co/cortexso/cogito-v1/tree/3b)       | 3b     | `cortex run cognito-v1:3b`                     |\n| 2  | [Cogito-v1-Preview-LLaMA-8B](https://huggingface.co/cortexso/cogito-v1/tree/8b)       | 8b     | `cortex run cognito-v1:8b`                     |\n| 3  | [Cogito-v1-Preview-Qwen-14B](https://huggingface.co/cortexso/cogito-v1/tree/14b)       | 14b    | `cortex run cognito-v1:14b`                    |\n| 4  | [Cogito-v1-Preview-Qwen-32B](https://huggingface.co/cortexso/cogito-v1/tree/32b)       | 32b    | `cortex run cognito-v1:32b`                    |\n| 5  | [Cogito-v1-Preview-LLaMA-70B](https://huggingface.co/cortexso/cogito-v1/tree/70b)     | 70b    | `cortex run cognito-v1:70b`                    |\n\nEach branch contains a default quantized version:\n- **LLaMA-3B:** q4-km  \n- **LLaMA-8B:** q4-km  \n- **Qwen-14B:** q4-km  \n- **Qwen-32B:** q4-km  \n- **LLaMA-70B:** q4-km  \n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)  \n2. Use in Jan model Hub:  \n   ```bash\n   deepcogito/cognito-v1\n   ```\n## Use it with Cortex (CLI)\n\n1. Install Cortex using [Quickstart](https://cortex.so/)\n2. Run the model with command:\n  ```bash\n  cortex run cognito-v1\n  ```\n\n## Credits\n\n- **Author:** DeepCogito\n- **Original License:** [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0)\n- **Papers:** [Cognito v1 Preview](https://www.deepcogito.com/research/cogito-v1-preview)",
-      "disabled": false,
-      "downloads": 4045,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{{- bos_token }}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- if not enable_thinking is defined %}\n    {%- set enable_thinking = false %}\n{%- endif %}\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n{#- Set the system message. If enable_thinking is true, add the \"Enable deep thinking subroutine.\" #}\n{%- if enable_thinking %}\n    {%- if system_message != \"\" %}\n        {%- set system_message = \"Enable deep thinking subroutine.\n\n\" ~ system_message %}\n    {%- else %}\n        {%- set system_message = \"Enable deep thinking subroutine.\" %}\n    {%- endif %}\n{%- endif %}\n{#- Set the system message. In case there are tools present, add them to the system message. #}\n{%- if tools is not none or system_message != '' %}\n    {{- \"<|start_header_id|>system<|end_header_id|>\n\n\" }}\n    {{- system_message }}\n    {%- if tools is not none %}\n        {%- if system_message != \"\" %}\n            {{- \"\n\n\" }}\n        {%- endif %}\n        {{- \"Available Tools:\n\" }}\n        {%- for t in tools %}\n            {{- t | tojson(indent=4) }}\n            {{- \"\n\n\" }}\n        {%- endfor %}\n    {%- endif %}\n    {{- \"<|eot_id|>\" }}\n{%- endif %}\n\n{#- Rest of the messages #}\n{%- for message in messages %}\n    {#- The special cases are when the message is from a tool (via role ipython/tool/tool_results) or when the message is from the assistant, but has \"tool_calls\". If not, we add the message directly as usual. #}\n    {#- Case 1 - Usual, non tool related message. #}\n    {%- if not (message.role == \"ipython\" or message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' }}\n        {%- if message['content'] is string %}\n            {{- message['content'] | trim }}\n        {%- else %}\n            {%- for item in message['content'] %}\n                {%- if item.type == 'text' %}\n                    {{- item.text | trim }}\n                {%- endif %}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|eot_id|>' }}\n    \n    {#- Case 2 - the response is from the assistant, but has a tool call returned. The assistant may also have returned some content along with the tool call. #}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"<|start_header_id|>assistant<|end_header_id|>\n\n\" }}\n        {%- if message['content'] is string %}\n            {{- message['content'] | trim }}\n        {%- else %}\n            {%- for item in message['content'] %}\n                {%- if item.type == 'text' %}\n                    {{- item.text | trim }}\n                    {%- if item.text | trim != \"\" %}\n                        {{- \"\n\n\" }}\n                    {%- endif %}\n                {%- endif %}\n            {%- endfor %}\n        {%- endif %}\n        {{- \"[\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {%- if not tool_call.id is defined %}\n                {{- out }}\n            {%- else %}\n                {{- out[:-1] }}\n                {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- endif %}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]<|eot_id|>\" }}\n            {%- endif %}\n        {%- endfor %}\n    \n    {#- Case 3 - the response is from a tool call. The tool call may have an id associated with it as well. If it does, we add it to the prompt. #}\n    {%- elif message.role == \"ipython\" or message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\n\n\" }}\n        {%- if message.tool_call_id is defined and message.tool_call_id != '' %}\n            {{- '{\"content\": ' + (message.content | tojson) + ', \"call_id\": \"' + message.tool_call_id + '\"}' }}\n        {%- else %}\n            {{- '{\"content\": ' + (message.content | tojson) + '}' }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }}\n{%- endif %}",
-        "context_length": 131072,
-        "eos_token": "<|eot_id|>",
-        "total": 3606752320
-      },
-      "id": "cortexso/cogito-v1",
-      "lastModified": "2025-04-10T03:02:13.000Z",
-      "likes": 3,
-      "model-index": null,
-      "modelId": "cortexso/cogito-v1",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "7e55c8c2946b9b48c606431e7a2eaf299c15b80d",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q2_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q6_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-3b-q8_0.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-70b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q2_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q6_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-llama-8b-q8_0.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q2_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q6_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-14b-q8_0.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "cogito-v1-preview-qwen-32b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 417094614784,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "cogito-v1:8b",
-        "size": 4920738752
-      },
-      {
-        "id": "cogito-v1:70b",
-        "size": 42520398016
-      },
-      {
-        "id": "cogito-v1:3b",
-        "size": 2241004384
-      },
-      {
-        "id": "cogito-v1:32b",
-        "size": 19848503488
-      },
-      {
-        "id": "cogito-v1:14b",
-        "size": 8985277888
-      }
-    ]
-  },
-  {
-    "author": "ibm-granite",
-    "id": "cortexso/granite-3.2-it",
-    "metadata": {
-      "_id": "67ab23c8e77c0a1c32f62879",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-02-11T10:17:44.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nGranite-3.2-it is an advanced AI language model derived from the IBM Granite framework, specifically designed for instruction-following tasks in Italian. Its primary purpose is to facilitate human-like interactions by understanding and generating responses that are contextually relevant and coherent. This model can be effectively utilized in various applications, including customer support, content creation, and language translation, enhancing communication efficiency across diverse sectors. Its performance demonstrates a strong ability to comprehend nuanced instructions and generate accurate outputs, making it suitable for professional and creative environments alike. Overall, Granite-3.2-it stands out for its adaptability, responsiveness, and proficiency in Italian language tasks.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Granite-3.2-it-8b](https://huggingface.co/cortexso/granite-3.2-it/tree/8b) | cortex run granite-3.2-it:8b|\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/granite-3.2-it\n    ```\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run granite-3.2-it\n    ```\n## Credits\n- **Author:** ibm-granite\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://www.apache.org/licenses/LICENSE-2.0)\n- **Paper:** [IBM Granite 3.2 Blog](https://www.ibm.com/new/announcements/ibm-granite-3-2-open-source-reasoning-and-vision)",
-      "disabled": false,
-      "downloads": 352,
-      "gated": false,
-      "gguf": {
-        "architecture": "granite",
-        "bos_token": "<|end_of_text|>",
-        "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"Knowledge Cutoff Date: April 2024.\nToday's Date: \" + strftime_now('%B %d, %Y') + \".\nYou are Granite, developed by IBM.\" %}\n    {%- if tools and documents %}\n        {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\n\nWrite the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n    {%- elif tools %}\n        {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\" %}\n    {%- elif documents %}\n        {%- set system_message = system_message + \" Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n    {%- elif thinking %}\n    {%- set system_message = system_message + \" You are a helpful AI assistant.\nRespond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts after 'Here is my thought process:' and write your response after 'Here is my response:' for each user query.\" %}\n    {%- else %}\n        {%- set system_message = system_message + \" You are a helpful AI assistant.\" %}    \n    {%- endif %}\n    {%- if 'citations' in controls and documents %}\n        {%- set system_message = system_message + '\n\nIn your response, use the symbols <co> and </co> to indicate when a fact comes from a document in the search result, e.g <co>0</co> for a fact from document 0. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}\n    {%- endif %}\n    {%- if 'hallucinations' in controls and documents %}\n        {%- set system_message = system_message + '\n\nFinally, after the response is written, include a numbered list of sentences from the response that are potentially hallucinated and not based in the documents.' %}\n    {%- endif %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>\n' }}\n{%- if tools %}\n    {{- '<|start_of_role|>tools<|end_of_role|>' }}\n    {{- tools | tojson(indent=4) }}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- if documents %}\n    {{- '<|start_of_role|>documents<|end_of_role|>' }}\n    {%- for document in documents %}\n        {{- 'Document ' + loop.index0 | string + '\n' }}\n        {{- document['text'] }}\n        {%- if not loop.last %}\n            {{- '\n\n'}}\n        {%- endif%}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in loop_messages %}\n    {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n    {%- if loop.last and add_generation_prompt %}\n        {{- '<|start_of_role|>assistant' }}\n            {%- if controls %}\n                {{- ' ' + controls | tojson()}}\n            {%- endif %}\n        {{- '<|end_of_role|>' }}\n    {%- endif %}\n{%- endfor %}",
-        "context_length": 131072,
-        "eos_token": "<|end_of_text|>",
-        "total": 8170848256
-      },
-      "id": "cortexso/granite-3.2-it",
-      "lastModified": "2025-03-03T02:11:18.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/granite-3.2-it",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "2fb3d81e43760500c0ad28f9b7d047c75abc16dd",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "granite-3.2-8b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 56447768704,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "granite-3.2-it:8b",
-        "size": 4942859456
-      }
-    ]
-  },
-  {
-    "author": "allenai",
-    "id": "cortexso/olmo-2",
-    "metadata": {
-      "_id": "6746c45ca0de7ab99efe78d5",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-27T07:03:56.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\nOLMo-2 is a series of Open Language Models designed to enable the science of language models. These models are trained on the Dolma dataset, with all code, checkpoints, logs (coming soon), and associated training details made openly available.\n\nThe OLMo-2 13B Instruct November 2024 is a post-trained variant of the OLMo-2 13B model, which has undergone supervised fine-tuning on an OLMo-specific variant of the Tülu 3 dataset. Additional training techniques include Direct Preference Optimization (DPO) and Reinforcement Learning from Virtual Rewards (RLVR), optimizing it for state-of-the-art performance across various tasks, including chat, MATH, GSM8K, and IFEval.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Olmo-2-7b](https://huggingface.co/cortexso/olmo-2/tree/7b) | `cortex run olmo-2:7b` |\n| 2 | [Olmo-2-13b](https://huggingface.co/cortexso/olmo-2/tree/13b) | `cortex run olmo-2:13b` |\n| 3 | [Olmo-2-32b](https://huggingface.co/cortexso/olmo-2/tree/32b) | `cortex run olmo-2:32b` |\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/olmo-2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run olmo-2\n    ```\n    \n## Credits\n\n- **Author:** allenai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Paper](https://arxiv.org/abs/2501.00656)",
-      "disabled": false,
-      "downloads": 352,
-      "gated": false,
-      "gguf": {
-        "architecture": "olmo2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n'  + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n'  + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}",
-        "context_length": 4096,
-        "eos_token": "<|endoftext|>",
-        "total": 32234279936
-      },
-      "id": "cortexso/olmo-2",
-      "lastModified": "2025-03-14T03:06:15.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/olmo-2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "b76f7629d2da0ccc9535845bab99291e317de088",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "olmo-2-0325-32b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-13b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "olmo-2-1124-7b-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2501.00656",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 335683989120,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "olmo-2:32b",
-        "size": 19482558496
-      },
-      {
-        "id": "olmo-2:13b",
-        "size": 8354349408
-      },
-      {
-        "id": "olmo-2:7b",
-        "size": 4472020160
-      }
-    ]
-  },
-  {
-    "author": "Microsoft",
-    "id": "cortexso/phi-4",
-    "metadata": {
-      "_id": "677f682eb2e41c2f45dbee73",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-09T06:09:50.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\nPhi-4 model, a state-of-the-art 14B parameter Transformer designed for advanced reasoning, conversational AI, and high-quality text generation. Built on a mix of synthetic datasets, filtered public domain content, academic books, and Q&A datasets, Phi-4 ensures exceptional performance through data quality and alignment. It features a 16K token context length, trained on 9.8T tokens over 21 days using 1920 H100-80G GPUs. Phi-4 underwent rigorous fine-tuning and preference optimization to enhance instruction adherence and safety. Released on December 12, 2024, it represents a static model with data cutoff as of June 2024, suitable for diverse applications in research and dialogue systems.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Phi-4-14b](https://huggingface.co/cortexso/phi-4/tree/14b) | `cortex run phi-4:14b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```text\n    cortexso/phi-4\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run phi-4\n    ```\n\n## Credits\n\n- **Author:** Microsoft Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/microsoft/phi-4/blob/main/LICENSE)\n- **Papers:** [Phi-4 Technical Report](https://arxiv.org/pdf/2412.08905)",
-      "disabled": false,
-      "downloads": 463,
-      "gated": false,
-      "gguf": {
-        "architecture": "phi3",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
-        "context_length": 16384,
-        "eos_token": "<|im_end|>",
-        "total": 14659507200
-      },
-      "id": "cortexso/phi-4",
-      "lastModified": "2025-03-02T15:30:47.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/phi-4",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "cc1f8271734a2ac438a1a7c60a62f111b9476524",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "phi-4-q2_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "phi-4-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-4-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-4-q6_k.gguf"
-        },
-        {
-          "rfilename": "phi-4-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2412.08905",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 93205915520,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "phi-4:14b",
-        "size": 9053114560
-      }
-    ]
-  },
-  {
-    "author": "MistralAI",
-    "id": "cortexso/mistral-small-24b",
-    "metadata": {
-      "_id": "679c3a8f4061a1ab60e703b7",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-31T02:50:55.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nThe 'mistral-small-24b' model is an advanced AI language model optimized for a variety of natural language processing tasks. It is particularly well-suited for applications such as text generation, chatbots, content summarization, and language translation. Built on the foundation of 'mistralai/Mistral-Small-24B-Base-2501', it leverages state-of-the-art techniques for understanding and generating human-like text. Users can expect significant improvements in fluency and contextual relevance, making it effective for both professional and creative use cases. Its efficiency allows for deployment in resource-constrained environments, catering to a diverse range of industries and applications.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistral-Small-24b](https://huggingface.co/cortexso/mistral-small-24b/tree/24b) | cortex run mistral-small-24b:24b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    \n    ```bash\n    cortexso/mistral-small-24b\n    ```\n    \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    \n    ```bash\n      cortex run mistral-small-24b\n    ```\n    \n## Credits\n- **Author:** mistralai\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper:** [Mistral Small 3 Blog](https://mistral.ai/news/mistral-small-3)",
-      "disabled": false,
-      "downloads": 683,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "context_length": 32768,
-        "eos_token": "</s>",
-        "total": 23572403200
-      },
-      "id": "cortexso/mistral-small-24b",
-      "lastModified": "2025-03-03T06:09:47.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/mistral-small-24b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "5a28cb4b0f1aa4e0b55f527b71c88eb5b56ebd71",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q2_k.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q6_k.gguf"
-        },
-        {
-          "rfilename": "mistral-small-24b-base-2501-q8_0.gguf"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us"
-      ],
-      "usedStorage": 148517729600,
-      "widgetData": [
-        {
-          "text": "My name is Julien and I like to"
-        },
-        {
-          "text": "I like traveling by train because"
-        },
-        {
-          "text": "Paris is an amazing place to visit,"
-        },
-        {
-          "text": "Once upon a time,"
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "mistral-small-24b:24b",
-        "size": 14333907488
-      }
-    ]
-  },
-  {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1-distill-qwen-7b",
-    "metadata": {
-      "_id": "6790a5b2044aeb2bd5922877",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-22T08:00:50.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) model, a distilled version of the Qwen 7B language model. This version is fine-tuned for high-performance text generation and optimized for dialogue and information-seeking tasks, providing even greater capabilities with its larger size compared to the 7B variant.\n\nThe model is designed for applications in customer support, conversational AI, and research, focusing on delivering accurate, helpful, and safe outputs while maintaining efficiency.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-7b-7b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-7b/tree/7b) | `cortex run deepseek-r1-distill-qwen-7b:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-7b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-7b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
-      "disabled": false,
-      "downloads": 1008,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 7615616512
-      },
-      "id": "cortexso/deepseek-r1-distill-qwen-7b",
-      "lastModified": "2025-03-03T06:27:42.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/deepseek-r1-distill-qwen-7b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "8e256fee6ed3616f3f90b0eb453083a115f1fe40",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-7b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 53341802656,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepseek-r1-distill-qwen-7b:7b",
-        "size": 4683073184
-      }
-    ]
-  },
-  {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1-distill-qwen-14b",
-    "metadata": {
-      "_id": "678fdf2be186002cc0ba006e",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-21T17:53:47.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B) model, a distilled version of the Qwen 14B language model. This variant represents the largest and most powerful model in the DeepSeek R1 Distill series, fine-tuned for high-performance text generation, dialogue optimization, and advanced reasoning tasks. \n\nThe model is designed for applications that require extensive understanding, such as conversational AI, research, large-scale knowledge systems, and customer service, providing superior performance in accuracy, efficiency, and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-14b-14b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-14b/tree/14b) | `cortex run deepseek-r1-distill-qwen-14b:14b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-14b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-14b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
-      "disabled": false,
-      "downloads": 1261,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 14770033664
-      },
-      "id": "cortexso/deepseek-r1-distill-qwen-14b",
-      "lastModified": "2025-03-03T06:40:22.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/deepseek-r1-distill-qwen-14b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "ca42c63b1c148ac7be176ef0ed8384d3775bed5b",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-14b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 102845421536,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepseek-r1-distill-qwen-14b:14b",
-        "size": 8988109920
-      }
-    ]
-  },
-  {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1-distill-qwen-32b",
-    "metadata": {
-      "_id": "678fe132df84bd3d94f37e58",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-21T18:02:26.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B) model, a distilled version of the Qwen 32B language model. This is the most advanced and largest model in the DeepSeek R1 Distill family, offering unparalleled performance in text generation, dialogue optimization, and reasoning tasks. \n\nThe model is tailored for large-scale applications in conversational AI, research, enterprise solutions, and knowledge systems, delivering exceptional accuracy, efficiency, and safety at scale.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-32b-32b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-32b/tree/32b) | `cortex run deepseek-r1-distill-qwen-32b:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-32b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-32b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
-      "disabled": false,
-      "downloads": 597,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 32763876352
-      },
-      "id": "cortexso/deepseek-r1-distill-qwen-32b",
-      "lastModified": "2025-03-03T06:41:05.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/deepseek-r1-distill-qwen-32b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "0ec9981b2b5ad5c04a5357a3c328f10735efc79a",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-32b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 225982083296,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepseek-r1-distill-qwen-32b:32b",
-        "size": 19851335520
-      }
-    ]
-  },
-  {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1-distill-llama-70b",
-    "metadata": {
-      "_id": "678fe1673b0a6384a4e1f887",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-21T18:03:19.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Llama 70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B) model, a distilled version of the Llama 70B language model. This model represents the pinnacle of the DeepSeek R1 Distill series, designed for exceptional performance in text generation, dialogue tasks, and advanced reasoning, offering unparalleled capabilities for large-scale AI applications.\n\nThe model is ideal for enterprise-grade applications, research, conversational AI, and large-scale knowledge systems, providing top-tier accuracy, safety, and efficiency.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-llama-70b-70b](https://huggingface.co/cortexso/deepseek-r1-distill-llama-70b/tree/70b) | `cortex run deepseek-r1-distill-llama-70b:70b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-llama-70b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-llama-70b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
-      "disabled": false,
-      "downloads": 580,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 70553706560
-      },
-      "id": "cortexso/deepseek-r1-distill-llama-70b",
-      "lastModified": "2025-03-03T06:42:21.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/deepseek-r1-distill-llama-70b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "d03fa1c83966573864075845a4b493af9aa8ed53",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-70b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 85040791136,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepseek-r1-distill-llama-70b:70b",
-        "size": 42520395584
-      }
-    ]
-  },
-  {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1-distill-llama-8b",
-    "metadata": {
-      "_id": "678f4b5625a9b93997f1f666",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-21T07:23:02.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Llama 8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B) model, a distilled version of the Llama 8B language model. This variant is fine-tuned for high-performance text generation, optimized for dialogue, and tailored for information-seeking tasks. It offers a robust balance between model size and performance, making it suitable for demanding conversational AI and research use cases.\n\nThe model is designed to deliver accurate, efficient, and safe responses in applications such as customer support, knowledge systems, and research environments.\n\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-llama-8b-8b](https://huggingface.co/cortexso/deepseek-r1-distill-llama-8b/tree/8b) | `cortex run deepseek-r1-distill-llama-8b:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-llama-8b\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-llama-8b\n    ```\n\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
-      "disabled": false,
-      "downloads": 933,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 8030261312
-      },
-      "id": "cortexso/deepseek-r1-distill-llama-8b",
-      "lastModified": "2025-03-03T06:33:03.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/deepseek-r1-distill-llama-8b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "b3321ad8a700b3aa2c3fc44ac84a167bd11ecdb8",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-llama-8b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 56187723232,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepseek-r1-distill-llama-8b:8b",
-        "size": 4920736256
-      }
-    ]
-  },
-  {
-    "author": "NovaSky-AI",
-    "id": "cortexso/sky-t1",
-    "metadata": {
-      "_id": "6782f82c860ee02fe01dbd60",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-11T23:01:00.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**NovaSky Team** developed and released the [Sky-T1](https://huggingface.co/novasky-ai/Sky-T1-32B-Preview), a 32-billion parameter reasoning model adapted from Qwen2.5-32B-Instruct. This model is designed for advanced reasoning, coding, and mathematical tasks, achieving performance comparable to state-of-the-art models like o1-preview while being cost-efficient. Sky-T1 was trained on 17K verified responses from Qwen/QwQ-32B-Preview, with additional science data from the Still-2 dataset, ensuring high-quality and diverse learning sources.\n\nThe model supports complex reasoning via long chain-of-thought processes and excels in both coding and mathematical challenges. Utilizing Llama-Factory with DeepSpeed Zero-3 Offload, Sky-T1 training was completed in just 19 hours on 8 H100 GPUs, demonstrating efficient resource utilization. These capabilities make Sky-T1 an exceptional tool for applications in programming, academic research, and reasoning-intensive tasks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Sky-t1-32b](https://huggingface.co/cortexso/sky-t1/tree/32b) | `cortex run sky-t1:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/sky-t1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run sky-t1\n    ```\n\n## Credits\n\n- **Author:** NovaSky Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Sky-T1: Fully Open-Source Reasoning Model](https://novasky-ai.github.io/posts/sky-t1/)",
-      "disabled": false,
-      "downloads": 116,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 32763876352
-      },
-      "id": "cortexso/sky-t1",
-      "lastModified": "2025-03-03T05:51:45.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/sky-t1",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "05f06ab0191808f8eb21fa3c60c9ec4a6bef4978",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q2_k.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q6_k.gguf"
-        },
-        {
-          "rfilename": "sky-t1-32b-preview-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 225982094944,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "sky-t1:32b",
-        "size": 19851336576
-      }
-    ]
-  },
-  {
-    "author": "CohereForAI",
-    "id": "cortexso/aya",
-    "metadata": {
-      "_id": "672aa4167f36760042e632ed",
-      "author": "cortexso",
-      "cardData": {
-        "license": "cc-by-nc-4.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-05T23:02:46.000Z",
-      "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Cohere For AI** developed and released the [Aya 23](https://huggingface.co/CohereForAI/aya-23-35B), an open weights instruction fine-tuned model with advanced multilingual capabilities. Aya 23 is built upon the highly performant Command family of models and fine-tuned using the Aya Collection to deliver state-of-the-art performance across 23 languages. This multilingual large language model is designed to support a wide range of use cases, including multilingual text generation, understanding, and translation tasks.\n\nAya 23, balancing efficiency and performance. It offers robust multilingual support for languages such as Arabic, Chinese, English, Spanish, Hindi, Vietnamese, and more, making it a versatile tool for global applications. A 35-billion parameter version is also available [here](https://huggingface.co/CohereForAI/aya-23-35b).\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Aya-8b](https://huggingface.co/cortexso/aya/tree/8b) | `cortex run aya:8b` |\n| 2 | [Aya-35b](https://huggingface.co/cortexso/aya/tree/35b) | `cortex run aya:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/aya\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run aya\n    ```\n\n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://spdx.org/licenses/CC-BY-NC-4.0)",
-      "disabled": false,
-      "downloads": 168,
-      "gated": false,
-      "gguf": {
-        "architecture": "command-r",
-        "bos_token": "<BOS_TOKEN>",
-        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<|END_OF_TURN_TOKEN|>",
-        "total": 34980831232
-      },
-      "id": "cortexso/aya",
-      "lastModified": "2025-03-02T14:58:34.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/aya",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "d97fef50adc54a22ec1e3133771f7cb17528742b",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "aya-23-35b-q2_k.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q6_k.gguf"
-        },
-        {
-          "rfilename": "aya-23-35b-q8_0.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q2_k.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q6_k.gguf"
-        },
-        {
-          "rfilename": "aya-23-8b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:cc-by-nc-4.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 302730192928,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "aya:35b",
-        "size": 21527043520
-      },
-      {
-        "id": "aya:8b",
-        "size": 5056974496
-      }
-    ]
-  },
-  {
-    "author": "PowerInfer",
-    "id": "cortexso/small-thinker",
-    "metadata": {
-      "_id": "6777192582e1ec3ecb79d1a4",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-02T22:54:29.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**PowerInfer** developed and released the [SmallThinker-3B-preview](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview), a fine-tuned version of the Qwen2.5-3B-Instruct model. SmallThinker is optimized for efficient deployment on resource-constrained devices while maintaining high performance in reasoning, coding, and general text generation tasks. It outperforms its base model on key benchmarks, including AIME24, AMC23, and GAOKAO2024, making it a robust tool for both edge deployment and as a draft model for larger systems like QwQ-32B-Preview.\n\nSmallThinker was fine-tuned in two phases using high-quality datasets, including PowerInfer/QWQ-LONGCOT-500K and PowerInfer/LONGCOT-Refine-500K. Its small size allows for up to 70% faster inference speeds compared to larger models, making it ideal for applications requiring quick responses and efficient computation.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Small-thinker-3b](https://huggingface.co/cortexso/small-thinker/tree/3b) | `cortex run small-thinker:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/small-thinker\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run small-thinker\n    ```\n\n## Credits\n\n- **Author:** PowerInfer\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/PowerInfer/SmallThinker-3B-Preview/blob/main/LICENSE)",
-      "disabled": false,
-      "downloads": 273,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 3397103616
-      },
-      "id": "cortexso/small-thinker",
-      "lastModified": "2025-03-03T06:05:50.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/small-thinker",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "f2746c69548d6ff92db6ec663400ad9a0dc51bbc",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q2_k.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q6_k.gguf"
-        },
-        {
-          "rfilename": "smallthinker-3b-preview-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 23981289568,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "small-thinker:3b",
-        "size": 2104931616
-      }
-    ]
-  },
-  {
-    "author": "Google",
-    "id": "cortexso/gemma2",
-    "metadata": {
-      "_id": "66b06c37491b555fefe0a0bf",
-      "author": "cortexso",
-      "cardData": {
-        "license": "gemma",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-08-05T06:07:51.000Z",
-      "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-2-2b-it), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma2-2b](https://huggingface.co/cortexso/gemma2/tree/2b) | `cortex run gemma2:2b` |\n| 2 | [Gemma2-9b](https://huggingface.co/cortexso/gemma2/tree/9b) | `cortex run gemma2:9b` |\n| 3 | [Gemma2-27b](https://huggingface.co/cortexso/gemma2/tree/27b) | `cortex run gemma2:27b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/gemma2\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run gemma2\n    ```\n    \n## Credits\n\n- **Author:** Go‌ogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)",
-      "disabled": false,
-      "downloads": 796,
-      "gated": false,
-      "gguf": {
-        "architecture": "gemma2",
-        "bos_token": "<bos>",
-        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<eos>",
-        "total": 27227128320
-      },
-      "id": "cortexso/gemma2",
-      "lastModified": "2025-03-03T06:25:38.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/gemma2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "36fdfde32513f2a0be9e1b166952d4cee227aaf6",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-2-27b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-2-2b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-2-9b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2403.08295",
-        "license:gemma",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 280987360512,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "gemma2:9b",
-        "size": 5761057888
-      },
-      {
-        "id": "gemma2:27b",
-        "size": 16645381792
-      },
-      {
-        "id": "gemma2:2b",
-        "size": 1708582656
-      }
-    ]
-  },
-  {
-    "author": "agentica-org",
-    "id": "cortexso/deepscaler",
-    "metadata": {
-      "_id": "67aaa7a5a6e6b3d852e347b2",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-02-11T01:28:05.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nDeepscaler is an advanced AI model developed from the agentica-org's DeepScaleR-1.5B-Preview, designed to enhance the efficiency and scalability of various machine learning tasks. Its core purpose is to provide high-quality predictive analytics and data processing capabilities while optimizing resource usage. Deepscaler is particularly useful in scenarios such as natural language processing, computer vision, and more complex data interpretation tasks, making it suitable for applications in industries like finance, healthcare, and entertainment. Users can leverage its performance to achieve faster training times and improved accuracy in their models. Overall, Deepscaler's architecture allows it to deliver robust results with reduced computational overhead, making it an excellent choice for developers and organizations aiming to scale their AI solutions.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepscaler-1.5b](https://huggingface.co/cortexso/deepscaler/tree/1.5b) | cortex run deepscaler:1.5b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepscaler\n    ```\n    \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepscaler\n    ```\n## Credits\n- **Author:** agentica-org\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [LICENSE](https://huggingface.co/agentica-org/DeepScaleR-1.5B-Preview/blob/main/LICENSE)",
-      "disabled": false,
-      "downloads": 404,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 1777088000
-      },
-      "id": "cortexso/deepscaler",
-      "lastModified": "2025-03-03T06:07:30.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/deepscaler",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "f2ac6bdbe311a9dbaf2bc4d77baa460b06b169e6",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepscaler-1.5b-preview-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 12728615584,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepscaler:1.5b",
-        "size": 1117321888
-      }
-    ]
-  },
-  {
-    "author": "Falcon LLM TII UAE",
-    "id": "cortexso/falcon3",
-    "metadata": {
-      "_id": "6761d4519d9bc9c3b6e25ad4",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-12-17T19:43:13.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n\n## Overview\n\nFalcon3-10B-Instruct is part of the Falcon3 family of Open Foundation Models, offering state-of-the-art performance in reasoning, language understanding, instruction following, code, and mathematics. With 10 billion parameters, Falcon3-10B-Instruct is optimized for high-quality instruction-following tasks and supports multilingual capabilities in English, French, Spanish, and Portuguese. It provides a long context length of up to 32K tokens, making it suitable for extended document understanding and processing.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Falcon3-10b](https://huggingface.co/cortexso/falcon3/tree/10b) | `cortex run falcon3:10b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/falcon3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run falcon3\n    ```\n    \n## Credits\n\n- **Author:** Falcon3 Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://falconllm.tii.ae/falcon-terms-and-conditions.html)\n- **Papers:** [Paper](https://arxiv.org/abs/2311.16867)",
-      "disabled": false,
-      "downloads": 276,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n{{- '<|system|>\\n' }}\n{%- if messages[0]['role'] == 'system' %}\n{{- messages[0]['content'] }}\n{%- set remaining_messages = messages[1:] %}\n{%- else %}\n{%- set remaining_messages = messages %}\n{%- endif %}\n{{- 'You are a Falcon assistant skilled in function calling. You are helpful, respectful, and concise.\\n\\n# Tools\\n\\nYou have access to the following functions. You MUST use them to answer questions when needed. For each function call, you MUST return a JSON object inside <tool_call></tool_call> tags.\\n\\n<tools>' + tools|tojson(indent=2) + '</tools>\\n\\n# Output Format\\n\\nYour response MUST follow this format when making function calls:\\n<tool_call>\\n[\\n  {\"name\": \"function_name\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": \"value2\"}},\\n  {\"name\": \"another_function\", \"arguments\": {\"arg\": \"value\"}}\\n]\\n</tool_call>\\nIf no function calls are needed, respond normally without the tool_call tags.\\n' }}\n{%- for message in remaining_messages %}\n{%- if message['role'] == 'user' %}\n{{- '<|user|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'assistant' %}\n{%- if message.content %}\n{{- '<|assistant|>\\n' + message['content'] }}\n{%- endif %}\n{%- if message.tool_calls %}\n{{- '\\n<tool_call>\\n' }}\n{{- message.tool_calls|tojson(indent=2) }}\n{{- '\\n</tool_call>' }}\n{%- endif %}\n{{- eos_token + '\\n' }}\n{%- elif message['role'] == 'tool' %}\n{{- '<|assistant|>\\n<tool_response>\\n' + message['content'] + '\\n</tool_response>\\n' }}\n{%- endif %}\n{%- endfor %}\n{{- '<|assistant|>\\n' if add_generation_prompt }}\n{%- else %}\n{%- for message in messages %}\n{%- if message['role'] == 'system' %}\n{{- '<|system|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'user' %}\n{{- '<|user|>\\n' + message['content'] + '\\n' }}\n{%- elif message['role'] == 'assistant' %}\n{%- if not loop.last %}\n{{- '<|assistant|>\\n' + message['content'] + eos_token + '\\n' }}\n{%- else %}\n{{- '<|assistant|>\\n' + message['content'] + eos_token }}\n{%- endif %}\n{%- endif %}\n{%- if loop.last and add_generation_prompt %}\n{{- '<|assistant|>\\n' }}\n{%- endif %}\n{%- endfor %}\n{%- endif %}",
-        "context_length": 32768,
-        "eos_token": "<|endoftext|>",
-        "total": 10305653760
-      },
-      "id": "cortexso/falcon3",
-      "lastModified": "2025-03-03T03:54:15.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/falcon3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "60030375504feacf3ba4205e8b9809e3dffc2ef7",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "falcon3-10b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2311.16867",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 65157537088,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "falcon3:10b",
-        "size": 6287521312
-      }
-    ]
-  },
-  {
-    "author": "Qwen",
-    "id": "cortexso/qwen2",
-    "metadata": {
-      "_id": "667917d974da9f6bfc120671",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "license_link": "https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE",
-        "license_name": "tongyi-qianwen",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-24T06:53:13.000Z",
-      "description": "---\nlicense: other\nlicense_name: tongyi-qianwen\nlicense_link: https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwen2 is the new series of Qwen large language models. For Qwen2, we release a number of base language models and instruction-tuned language models ranging from 0.5 to 72 billion parameters, including a Mixture-of-Experts model. This repo contains the instruction-tuned 72B Qwen2 model.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen2-7b](https://huggingface.co/cortexso/qwen2/tree/7b) | `cortex run qwen2:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/qwen2\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run qwen2\n    ```\n    \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Qwen/Qwen2-72B-Instruct/blob/main/LICENSE)",
-      "disabled": false,
-      "downloads": 130,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 7615616512
-      },
-      "id": "cortexso/qwen2",
-      "lastModified": "2025-03-02T15:15:09.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/qwen2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "e2c6376ad87c7b2da92bc2a2b63ba168d85b1c6d",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2-7b-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 53341783520,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "qwen2:7b",
-        "size": 4683071456
-      }
-    ]
-  },
-  {
-    "author": "Nous Research",
-    "id": "cortexso/hermes3",
-    "metadata": {
-      "_id": "675a4743cb0f75e1a3a19ae5",
-      "author": "cortexso",
-      "cardData": {
-        "license": "llama3",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-12-12T02:15:31.000Z",
-      "description": "---\nlicense: llama3\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Nous Research** developed and released the [Hermes 3](https://huggingface.co/NousResearch/Hermes-3-Llama-3.2-3B), a state-of-the-art instruction-tuned language model built on Llama-3.2-3B. This 3-billion parameter model is a fine-tuned version of Llama-3.2 and represents a leap forward in reasoning, multi-turn conversation, and structured outputs. It incorporates advanced role-playing capabilities, reliable function calling, and improved coherence over long contexts, making it a versatile assistant for various applications.\n\nHermes 3 was trained with high-quality data, leveraging fine-tuning techniques on H100 GPUs via LambdaLabs GPU Cloud. The model excels in both general-purpose and specialized tasks, including code generation, reasoning, and advanced conversational abilities. With support for ChatML prompt formatting, Hermes 3 ensures compatibility with OpenAI endpoints and facilitates structured, steerable interactions for end-users.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Hermes3-3b](https://huggingface.co/cortexso/hermes3/tree/main) | `cortex run hermes3:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/hermes3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run hermes3\n    ```\n\n## Credits\n\n- **Author:** Nous Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Meta-Llama-3-8B/blob/main/LICENSE)\n- **Papers:** [Hermes 3 Technical Report](https://arxiv.org/pdf/2408.11857)",
-      "disabled": false,
-      "downloads": 421,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<|im_end|>",
-        "total": 3212749888
-      },
-      "id": "cortexso/hermes3",
-      "lastModified": "2025-03-03T02:36:41.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/hermes3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "b987bf2aa863d1c3590e242aaf5b81a5dc3ea8f3",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q2_k.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q6_k.gguf"
-        },
-        {
-          "rfilename": "hermes-3-llama-3.2-3b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2408.11857",
-        "license:llama3",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 23033625536,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "hermes3:3b",
-        "size": 2019373888
-      }
-    ]
-  },
-  {
-    "author": "Qwen",
-    "id": "cortexso/qwen2.5-coder",
-    "metadata": {
-      "_id": "6732691d254c0b2144f11764",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-11T20:29:17.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Qwen Labs** developed and released the [Qwen2.5-Coder](https://huggingface.co/Qwen) model, a state-of-the-art language model tailored for code generation, understanding, and completion tasks. Featuring a 2.5B parameter dense Transformer architecture, Qwen2.5-Coder is designed to assist developers and researchers by generating high-quality code snippets, providing algorithm explanations, and completing coding prompts with accuracy. The model was trained on a diverse blend of programming languages and frameworks using carefully filtered code datasets to ensure precision and relevance. It leverages advanced fine-tuning techniques and rigorous safety measures to optimize instruction adherence and deliver reliable, contextually aware outputs. Released in November 2024, Qwen2.5-Coder offers an effective tool for software development, academic research, and programming education.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen2.5-coder-14b](https://huggingface.co/cortexso/qwen2.5-coder/tree/14b) | `cortex run qwen2.5-coder:14b` |\n| 1 | [Qwen2.5-coder-32b](https://huggingface.co/cortexso/qwen2.5-coder/tree/32b) | `cortex run qwen2.5-coder:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/qwen2.5-coder\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run qwen2.5-coder\n    ```\n\n## Credits\n\n- **Author:** Qwen Labs\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct/blob/main/LICENSE)\n- **Papers:** [Qwen2.5-Coder Technical Report](https://arxiv.org/abs/2409.12186)",
-      "disabled": false,
-      "downloads": 1369,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 14770033664
-      },
-      "id": "cortexso/qwen2.5-coder",
-      "lastModified": "2025-03-03T04:26:33.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/qwen2.5-coder",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "b472c129cc68732d81e50ce48e621fe1861e8d1c",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-14b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-32b-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2409.12186",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 328827521152,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "qwen2.5-coder:32b",
-        "size": 19851336256
-      },
-      {
-        "id": "qwen2.5-coder:14b",
-        "size": 8988110656
-      }
-    ]
-  },
-  {
-    "author": "Microsoft",
-    "id": "cortexso/phi-3.5",
-    "metadata": {
-      "_id": "67211d1b527f6fcd90b9dca3",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-10-29T17:36:27.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n**Microsoft** developed and released the [Phi-3.5](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) model, a state-of-the-art large language model built upon the Phi-3 architecture. With its focus on high-quality, reasoning-dense data, this model represents a significant advancement in instruction-tuned language models. Phi-3.5 has been fine-tuned through supervised learning, proximal policy optimization (PPO), and direct preference optimization (DPO) to ensure precise instruction following and robust safety measures. Supporting a 128K token context length, the model demonstrates exceptional performance in tasks requiring extended context understanding and complex reasoning. The model's training data consists of synthetic datasets and carefully filtered publicly available web content, inheriting the high-quality foundation established in the Phi-3 series.\n\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Phi-3.5-3b](https://huggingface.co/cortexso/phi-3.5/tree/3b) | `cortex run phi-3.5:3b` |\n\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/phi-3.5\n    ```\n\n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run phi-3.5\n    ```\n\n## Credits\n- **Author:** Microsoft\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/mit/)\n- **Papers:** [Phi-3.5 Paper](https://arxiv.org/abs/2404.14219)",
-      "disabled": false,
-      "downloads": 299,
-      "gated": false,
-      "gguf": {
-        "architecture": "phi3",
-        "bos_token": "<s>",
-        "chat_template": "{% for message in messages %}{% if message['role'] == 'system' and message['content'] %}{{'<|system|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'user' %}{{'<|user|>\n' + message['content'] + '<|end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|assistant|>\n' + message['content'] + '<|end|>\n'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<|endoftext|>",
-        "total": 3821079648
-      },
-      "id": "cortexso/phi-3.5",
-      "lastModified": "2025-03-03T05:42:47.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/phi-3.5",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "7fd139ae9bdff00feae40ad3e4d7ce6dc0c48a91",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "phi-3.5-mini-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2404.14219",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 26770128384,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "phi-3.5:3b",
-        "size": 2393232384
-      }
-    ]
-  },
-  {
-    "author": "meta-llama",
-    "id": "cortexso/llama3.3",
-    "metadata": {
-      "_id": "67568c9b6ac1ee73523d7623",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-12-09T06:22:19.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**Meta** developed and released the [Llama3.3](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct) model, a state-of-the-art multilingual large language model designed for instruction-tuned generative tasks. With 70 billion parameters, this model is optimized for multilingual dialogue use cases, providing high-quality text input and output. Llama3.3 has been fine-tuned through supervised learning and reinforcement learning with human feedback (RLHF) to align with human preferences for helpfulness and safety. It sets a new standard in performance, outperforming many open-source and closed-source chat models on common industry benchmarks. The model’s capabilities make it a powerful tool for applications requiring conversational AI, multilingual support, and instruction adherence.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3.3-70b](https://huggingface.co/cortexso/llama3.3/tree/70b) | `cortex run llama3.3:70b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.3\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://llama.meta.com/llama3/license/)\n- **Papers:** [Llama-3 Blog](https://llama.meta.com/llama3/)",
-      "disabled": false,
-      "downloads": 964,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
-        "context_length": 131072,
-        "eos_token": "<|eot_id|>",
-        "total": 70553706560
-      },
-      "id": "cortexso/llama3.3",
-      "lastModified": "2025-03-03T03:59:38.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/llama3.3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "9cc0507ae02f03cf59c630c1ffa5d369441e27eb",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "llama-3.3-70b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 42520398432,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "llama3.3:70b",
-        "size": 42520398432
-      }
-    ]
-  },
-  {
-    "author": "inftech.ai",
-    "id": "cortexso/opencoder",
-    "metadata": {
-      "_id": "672fb2f43db04d9bf3f4c393",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-09T19:07:32.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nOpenCoder is an open and reproducible code LLM family, featuring 1.5B and 8B base and chat models that support both English and Chinese languages. Built from scratch, OpenCoder is pretrained on 2.5 trillion tokens, composed of 90% raw code and 10% code-related web data. It undergoes supervised fine-tuning (SFT) with over 4.5 million high-quality examples, achieving performance on par with top-tier code LLMs\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Opencoder-8b](https://huggingface.co/cortexso/opencoder/tree/8b) | `cortex run opencoder:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/opencoder\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run opencoder\n    ```\n    \n## Credits\n\n- **Author:** inftech.ai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/infly/OpenCoder-8B-Instruct/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.04905)",
-      "disabled": false,
-      "downloads": 650,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|im_start|>",
-        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are OpenCoder, created by OpenCoder Team.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<|im_end|>",
-        "total": 7771262976
-      },
-      "id": "cortexso/opencoder",
-      "lastModified": "2025-03-03T02:25:59.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/opencoder",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "2b98756c8b01811470941deb8a0259de3dd4018c",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "opencoder-8b-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2411.04905",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 54076349664,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "opencoder:8b",
-        "size": 4736059168
-      }
-    ]
-  },
-  {
-    "author": "Google",
-    "id": "cortexso/gemma",
-    "metadata": {
-      "_id": "6667b642f760460127737cc6",
-      "author": "cortexso",
-      "cardData": {
-        "license": "gemma",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-11T02:28:18.000Z",
-      "description": "---\nlicense: gemma\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [Gemma](https://huggingface.co/google/gemma-7b), state-of-the-art open model trained with the Gemma datasets that includes both synthetic data and the filtered publicly available websites data with a focus on high-quality and reasoning dense properties. The model belongs to the Gemma family with the 4B, 7B version in two variants 8K and 128K which is the context length (in tokens) that it can support.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Gemma-7b](https://huggingface.co/cortexso/gemma/tree/7b) | `cortex run gemma:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/gemma\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run gemma\n    ```\n    \n## Credits\n\n- **Author:** Go‌ogle\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://ai.google.dev/gemma/terms)\n- **Papers:** [Gemma Technical Report](https://arxiv.org/abs/2403.08295)",
-      "disabled": false,
-      "downloads": 280,
-      "gated": false,
-      "gguf": {
-        "architecture": "gemma",
-        "bos_token": "<bos>",
-        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<eos>",
-        "total": 8537680896
-      },
-      "id": "cortexso/gemma",
-      "lastModified": "2025-03-03T06:14:39.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/gemma",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "801b78a606397281d5953e5e8f2a64b6158e2db2",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "gemma-7b-it-q2_k.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q6_k.gguf"
-        },
-        {
-          "rfilename": "gemma-7b-it-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2403.08295",
-        "license:gemma",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 60258935328,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "gemma:7b",
-        "size": 5329759680
-      }
-    ]
-  },
-  {
-    "author": "MistralAI",
-    "id": "cortexso/mistral-nemo",
-    "metadata": {
-      "_id": "66f4e292515759ca6d5287bd",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-09-26T04:26:58.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMistralai developed and released the [Mistral-Nemo](https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407) family of large language models (LLMs).\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistral-nemo-12b](https://huggingface.co/cortexso/mistral-nemo/tree/12b) | `cortex run mistral-nemo:12b` ||\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/mistral-nemo\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run mistral-nemo\n    ```\n\n## Credits\n\n- **Author:** MistralAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Apache 2 License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Mistral Nemo Blog](https://mistral.ai/news/mistral-nemo/)",
-      "disabled": false,
-      "downloads": 546,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "chat_template": "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS][\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST]\" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST]\" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif (message.tool_calls is defined and message.tool_calls is not none) %}\n        {{- \"[TOOL_CALLS][\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- message[\"content\"] + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS]{\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
-        "context_length": 131072,
-        "eos_token": "</s>",
-        "total": 12247782400
-      },
-      "id": "cortexso/mistral-nemo",
-      "lastModified": "2025-03-03T02:42:16.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/mistral-nemo",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "487a202e44ea08566ab73ed16b5f7f685d12cf6b",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q2_k.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q6_k.gguf"
-        },
-        {
-          "rfilename": "mistral-nemo-instruct-2407-q8_0.gguf"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 85369454144,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "mistral-nemo:12b",
-        "size": 7477207744
-      }
-    ]
-  },
-  {
-    "author": "meta-llama",
-    "id": "cortexso/llama3.2",
-    "metadata": {
-      "_id": "66f63309ba963b1db95deaa4",
-      "author": "cortexso",
-      "cardData": {
-        "license": "llama3.2",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2024-09-27T04:22:33.000Z",
-      "description": "---\nlicense: llama3.2\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.2](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [LLama3.2-1b](https://huggingface.co/cortexso/llama3.2/tree/1b) | `cortex run llama3.2:1b` |\n| 2 | [LLama3.2-3b](https://huggingface.co/cortexso/llama3.2/tree/3b) | `cortex run llama3.2:3b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.2\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Llama-3.2-3B-Instruct/blob/main/LICENSE.txt)\n- **Papers:** [Llama-3.2 Blog](https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/)",
-      "disabled": false,
-      "downloads": 11227,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- if strftime_now is defined %}\n        {%- set date_string = strftime_now(\"%d %b %Y\") %}\n    {%- else %}\n        {%- set date_string = \"26 Jul 2024\" %}\n    {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n        {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n        {{- '\"parameters\": ' }}\n        {{- tool_call.arguments | tojson }}\n        {{- \"}\" }}\n        {{- \"<|eot_id|>\" }}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
-        "context_length": 131072,
-        "eos_token": "<|eot_id|>",
-        "total": 1235814432
-      },
-      "id": "cortexso/llama3.2",
-      "lastModified": "2025-03-03T06:22:08.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/llama3.2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "73313225fbeff0cebf5ccf48121cba6ca1a80e7d",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-1b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.2-3b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "license:llama3.2",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 31409886432,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "llama3.2:3b",
-        "size": 2019377312
-      },
-      {
-        "id": "llama3.2:1b",
-        "size": 911503104
-      }
-    ]
-  },
-  {
-    "author": "Qwen",
-    "id": "cortexso/qwen2.5",
-    "metadata": {
-      "_id": "671d0d55748faf685e6450a3",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-10-26T15:40:05.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwen2.5 by Qwen is a family of model include various specialized models for coding and mathematics available in multiple sizes from 0.5B to 72B parameters\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwen-2.5-0.5b](https://huggingface.co/cortexso/qwen2.5/tree/0.5b) | `cortex run qwen2.5:0.5b` |\n| 2 | [Qwen-2.5-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/1.5b) | `cortex run qwen2.5:1.5b` |\n| 3 | [Qwen-2.5-3b](https://huggingface.co/cortexso/qwen2.5/tree/3b) | `cortex run qwen2.5:3b` |\n| 4 | [Qwen-2.5-7b](https://huggingface.co/cortexso/qwen2.5/tree/7b) | `cortex run qwen2.5:7b` |\n| 5 | [Qwen-2.5-14b](https://huggingface.co/cortexso/qwen2.5/tree/14b) | `cortex run qwen2.5:14b` |\n| 6 | [Qwen-2.5-32b](https://huggingface.co/cortexso/qwen2.5/tree/32b) | `cortex run qwen2.5:32b` |\n| 7 | [Qwen-2.5-72b](https://huggingface.co/cortexso/qwen2.5/tree/72b) | `cortex run qwen2.5:72b` |\n| 8 | [Qwen-2.5-coder-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/coder-1.5b) | `cortex run qwen2.5:coder-1.5b` |\n| 9 | [Qwen-2.5-coder-7b](https://huggingface.co/cortexso/qwen2.5/tree/coder-7b) | `cortex run qwen2.5:coder-7b` |\n| 10 | [Qwen-2.5-math-1.5b](https://huggingface.co/cortexso/qwen2.5/tree/math-1.5b) | `cortex run qwen2.5:math-1.5b` |\n| 11 | [Qwen-2.5-math-7b](https://huggingface.co/cortexso/qwen2.5/tree/math-7b) | `cortex run qwen2.5:math-7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```\n    cortexso/qwen2.5\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```\n    cortex run qwen2.5\n    ```\n\n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Qwen2.5 Blog](https://qwenlm.github.io/blog/qwen2.5/)",
-      "disabled": false,
-      "downloads": 3608,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 494032768
-      },
-      "id": "cortexso/qwen2.5",
-      "lastModified": "2025-03-03T04:07:15.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/qwen2.5",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "d801e60d205491ab449425f3779b13bedbbe463d",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-0.5b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-1.5b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-14b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-32b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-3b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-72b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-7b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-1.5b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-coder-7b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-1.5b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen2.5-math-7b-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 596251612960,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "qwen2.5:1.5b",
-        "size": 986048384
-      },
-      {
-        "id": "qwen2.5:math-1.5b",
-        "size": 986048416
-      },
-      {
-        "id": "qwen2.5:3b",
-        "size": 1929902912
-      },
-      {
-        "id": "qwen2.5:14b",
-        "size": 8988110592
-      },
-      {
-        "id": "qwen2.5:0.5b",
-        "size": 397807808
-      },
-      {
-        "id": "qwen2.5:72b",
-        "size": 47415715104
-      },
-      {
-        "id": "qwen2.5:coder-1.5b",
-        "size": 986048480
-      },
-      {
-        "id": "qwen2.5:32b",
-        "size": 19851336192
-      },
-      {
-        "id": "qwen2.5:math-7b",
-        "size": 4683073856
-      },
-      {
-        "id": "qwen2.5:7b",
-        "size": 4683073856
-      },
-      {
-        "id": "qwen2.5:coder-7b",
-        "size": 4683073920
-      }
-    ]
-  },
-  {
-    "author": "MistralAI",
-    "id": "cortexso/codestral",
-    "metadata": {
-      "_id": "66724fb044ee478111905260",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-19T03:25:36.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nCodestral-22B-v0.1 is trained on a diverse dataset of 80+ programming languages, including the most popular ones, such as Python, Java, C, C++, JavaScript, and Bash\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Codestral-22b](https://huggingface.co/cortexso/codestral/tree/22b) | `cortex run codestral:22b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/codestral\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run codestral\n    ```\n    \n## Credits\n\n- **Author:** Mistral AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://mistral.ai/licenses/MNPL-0.1.md)\n- **Papers:** [Codestral Blog](https://mistral.ai/news/codestral/)",
-      "disabled": false,
-      "downloads": 517,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "chat_template": "{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content'] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n    {%- endif %}\n    {%- if message['role'] == 'user' %}\n        {%- if loop.last and system_message is defined %}\n            {{- '[INST] ' + system_message + '\\n\\n' + message['content'] + '[/INST]' }}\n        {%- else %}\n            {{- '[INST] ' + message['content'] + '[/INST]' }}\n        {%- endif %}\n    {%- elif message['role'] == 'assistant' %}\n        {{- ' ' + message['content'] + eos_token}}\n    {%- else %}\n        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n    {%- endif %}\n{%- endfor %}\n",
-        "context_length": 32768,
-        "eos_token": "</s>",
-        "total": 22247282688
-      },
-      "id": "cortexso/codestral",
-      "lastModified": "2025-03-02T15:11:11.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/codestral",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "6b522a6f0ce9c94a2f317c3802180aca4f526a30",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q2_k.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q6_k.gguf"
-        },
-        {
-          "rfilename": "codestral-22b-v0.1-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 166025350400,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "codestral:22b",
-        "size": 13341239008
-      }
-    ]
-  },
-  {
-    "author": "Nous Research",
-    "id": "cortexso/openhermes-2.5",
-    "metadata": {
-      "_id": "6669ee8d6993100c6f8befa7",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-12T18:53:01.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nOpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [OpenHermes 2.5-7b](https://huggingface.co/cortexso/openhermes-2.5/tree/7b) | `cortex run openhermes-2.5:7b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/openhermes-2.5\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run openhermes-2.5\n    ```\n    \n## Credits\n\n- **Author:** Nous Research\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md)\n- **Papers:** [Openhermes 2.5](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B)",
-      "disabled": false,
-      "downloads": 230,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 7241748480
-      },
-      "id": "cortexso/openhermes-2.5",
-      "lastModified": "2025-03-02T14:54:17.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/openhermes-2.5",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "e4ef98ea46b61d21e434a79704717f7065c306a9",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q2_k.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q6_k.gguf"
-        },
-        {
-          "rfilename": "openhermes-2.5-mistral-7b-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 122667617430,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "openhermes-2.5:7b",
-        "size": 4368451712
-      }
-    ]
-  },
-  {
-    "author": "sail",
-    "id": "cortexso/sailor-2",
-    "metadata": {
-      "_id": "674f5d998f1ed02584bf68d8",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-12-03T19:35:53.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nSailor2 is a community-driven initiative that brings cutting-edge multilingual language models to South-East Asia (SEA). It is designed to address the growing demand for diverse, robust, and accessible language technologies in the region. Built upon the foundation of Qwen 2.5, Sailor2 is continuously pre-trained on 500B tokens, significantly improving its support for 15 languages with a unified model. These languages include English, Chinese, Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray.\n\nSailor2 is available in three sizes: 1B, 8B, and 20B, which are expansions from the Qwen2.5 base models of 0.5B, 7B, and 14B, respectively. These models serve a wide range of applications, from production use to research and speculative decoding, ensuring accessibility to advanced language technologies across SEA.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Sailor-2-1b](https://huggingface.co/cortexso/sailor-2/tree/1b) | `cortex run sailor-2:1b` |\n| 2 | [Sailor-2-8b](https://huggingface.co/cortexso/sailor-2/tree/8b) | `cortex run sailor-2:8b` |\n| 3 | [Sailor-2-20b](https://huggingface.co/cortexso/sailor-2/tree/20b) | `cortex run sailor-2:20b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/sailor-2\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run sailor-2\n    ```\n    \n## Credits\n\n- **Author:** Community-driven (Sailor2 Initiative)\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Technical Paper](https://arxiv.org/pdf/2502.12982)",
-      "disabled": false,
-      "downloads": 178,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 4096,
-        "eos_token": "<|im_end|>",
-        "total": 988064640
-      },
-      "id": "cortexso/sailor-2",
-      "lastModified": "2025-03-03T02:58:28.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/sailor-2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "89b3079762dedf6ff4fbc94545632b3554c16420",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q2_k.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q6_k.gguf"
-        },
-        {
-          "rfilename": "sailor2-1b-chat-q8_0.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q2_k.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q6_k.gguf"
-        },
-        {
-          "rfilename": "sailor2-20b-chat-q8_0.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q2_k.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q6_k.gguf"
-        },
-        {
-          "rfilename": "sailor2-8b-chat-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2502.12982",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 201040376768,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "sailor-2:1b",
-        "size": 738628256
-      },
-      {
-        "id": "sailor-2:20b",
-        "size": 11622380384
-      },
-      {
-        "id": "sailor-2:8b",
-        "size": 5242934176
-      }
-    ]
-  },
-  {
-    "author": "CohereForAI",
-    "id": "cortexso/aya-expanse",
-    "metadata": {
-      "_id": "671ac0aee98f80735b80ce0d",
-      "author": "cortexso",
-      "cardData": {
-        "license": "cc-by-sa-4.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-10-24T21:48:30.000Z",
-      "description": "---\nlicense: cc-by-sa-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAya Expanse is an open-weight research release of a model with highly advanced multilingual capabilities. It focuses on pairing a highly performant pre-trained Command family of models with the result of a year’s dedicated research from Cohere For AI, including data arbitrage, multilingual preference training, safety tuning, and model merging. The result is a powerful multilingual large language model serving 23 languages.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Aya-expanse-8b](https://huggingface.co/cortexso/aya-expanse/tree/8b) | `cortex run aya-expanse:8b` |\n| 2 | [Aya-expanse-32b](https://huggingface.co/cortexso/aya-expanse/tree/32b) | `cortex run aya-expanse:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/aya-expanse\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run aya-expanse\n    ```\n\n## Credits\n\n- **Author:** CohereAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://cohere.com/c4ai-cc-by-nc-license)\n- **Papers:** [Aya Expanse Blog](https://cohere.com/blog/aya-expanse-connecting-our-world)",
-      "disabled": false,
-      "downloads": 219,
-      "gated": false,
-      "gguf": {
-        "architecture": "command-r",
-        "bos_token": "<BOS_TOKEN>",
-        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are Aya, a brilliant, sophisticated, multilingual AI-assistant trained to assist human users by providing thorough responses. You are able to interact and respond to questions in 23 languages and you are powered by a multilingual model built by Cohere For AI.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<|END_OF_TURN_TOKEN|>",
-        "total": 32296476672
-      },
-      "id": "cortexso/aya-expanse",
-      "lastModified": "2025-03-03T05:45:56.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/aya-expanse",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "d3de661105fcf536bac3f1ec747a2d39d25fe08f",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-32b-q8_0.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q2_k.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q6_k.gguf"
-        },
-        {
-          "rfilename": "aya-expanse-8b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:cc-by-sa-4.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 283759636448,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "aya-expanse:8b",
-        "size": 5056974624
-      },
-      {
-        "id": "aya-expanse:32b",
-        "size": 19800825408
-      }
-    ]
-  },
-  {
-    "author": "CohereForAI",
-    "id": "cortexso/command-r",
-    "metadata": {
-      "_id": "66751b98585f2bf57092b2ae",
-      "author": "cortexso",
-      "cardData": {
-        "license": "cc-by-nc-4.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-21T06:20:08.000Z",
-      "description": "---\nlicense: cc-by-nc-4.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nC4AI Command-R is a research release of a 35 billion parameter highly performant generative model. Command-R is a large language model with open weights optimized for a variety of use cases including reasoning, summarization, and question answering. Command-R has the capability for multilingual generation evaluated in 10 languages and highly performant RAG capabilities.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Command-r-32b](https://huggingface.co/cortexhub/command-r/tree/32b) | `cortex run command-r:32b` |\n| 1 | [Command-r-35b](https://huggingface.co/cortexhub/command-r/tree/35b) | `cortex run command-r:35b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/command-r\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run command-r\n    ```\n    \n## Credits\n\n- **Author:** Cohere For AI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://cohere.com/c4ai-cc-by-nc-license)",
-      "disabled": false,
-      "downloads": 613,
-      "gated": false,
-      "gguf": {
-        "architecture": "command-r",
-        "bos_token": "<BOS_TOKEN>",
-        "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true %}{% set loop_messages = messages %}{% set system_message = 'You are a large language model called Command R built by the company Cohere. You act as a brilliant, sophisticated, AI-assistant chatbot trained to assist human users by providing thorough responses.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% if system_message != false %}{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% elif message['role'] == 'assistant' %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<|END_OF_TURN_TOKEN|>",
-        "total": 32296476672
-      },
-      "id": "cortexso/command-r",
-      "lastModified": "2025-03-03T05:55:03.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/command-r",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "829fc0c4d726206187684dcbaf2a53c658d5d34a",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q2_k.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q6_k.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-08-2024-q8_0.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q2_k.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q6_k.gguf"
-        },
-        {
-          "rfilename": "c4ai-command-r-v01-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:cc-by-nc-4.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 471257928608,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "command-r:32b",
-        "size": 19800837184
-      },
-      {
-        "id": "command-r:35b",
-        "size": 21527055296
-      }
-    ]
-  },
-  {
-    "author": "simplescaling",
-    "id": "cortexso/simplescaling-s1",
-    "metadata": {
-      "_id": "67a4e03a6f317f30b9a285b0",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-02-06T16:15:54.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\nThe 'simplescaling-s1' model is a refined version of 'simplescaling/s1-32B,' designed to enhance scalability and streamline tasks in AI applications. It focuses on efficiently managing resource allocation while maintaining high performance across various workloads. This model is particularly effective for text generation, summarization, and conversational AI, as it balances speed and accuracy. Users can leverage 'simplescaling-s1' for building scalable applications that require processing large datasets or generating content quickly. Overall, the model achieves impressive results with reduced computational overhead, making it suitable for both research and practical deployments.\n## Variants\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Simplescaling-s1-32b](https://huggingface.co/cortexso/simplescaling-s1/tree/32b) | cortex run simplescaling-s1:32b |\n## Use it with Jan (UI)\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/simplescaling-s1\n    ```\n    \n## Use it with Cortex (CLI)\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run simplescaling-s1\n    ```\n## Credits\n- **Author:** simplescaling\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Paper**: [Paper](https://arxiv.org/abs/2501.19393)",
-      "disabled": false,
-      "downloads": 104,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 32763876352
-      },
-      "id": "cortexso/simplescaling-s1",
-      "lastModified": "2025-03-03T03:46:24.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/simplescaling-s1",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "5755e76ec22a9ca9d0271ce16f5287bb9ad3c1a6",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "s1-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "s1-32b-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2501.19393",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 206130756480,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "simplescaling-s1:32b",
-        "size": 19851336384
-      }
-    ]
-  },
-  {
-    "author": "Qwen",
-    "id": "cortexso/qwq",
-    "metadata": {
-      "_id": "67497b496615e96c7c8d6b05",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-29T08:28:57.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nQwQ-32B-Preview is an experimental large-scale research model by the Qwen Team, focusing on advanced AI reasoning. While it demonstrates strong analytical capabilities, it also presents notable limitations:\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Qwq-32b](https://huggingface.co/cortexso/qwq/tree/32b) | `cortex run qwq:32b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/qwq\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run qwq\n    ```\n    \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Qwen/QwQ-32B-Preview/blob/main/LICENSE)\n- **Papers:** [QwQ Blog](https://qwenlm.github.io/blog/qwq-32b-preview/)",
-      "disabled": false,
-      "downloads": 101,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 32763876352
-      },
-      "id": "cortexso/qwq",
-      "lastModified": "2025-03-03T02:23:40.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/qwq",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "fc6f23c0d5c8faf8b79b11e03aaa7c656fed8dfd",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwq-32b-preview-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 206130755200,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "qwq:32b",
-        "size": 19851336256
-      }
-    ]
-  },
-  {
-    "author": "Nexusflow",
-    "id": "cortexso/athene",
-    "metadata": {
-      "_id": "6737ae7de6b1d15ff54d0a08",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-15T20:26:37.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nAthene-V2-Chat-72B is an open-weight LLM that competes on par with GPT-4o across various benchmarks. It is currently ranked as the best open model on Chatbot Arena, where it outperforms GPT-4o-0513 (the highest-ranked GPT-4o model on Arena) in hard and math categories. It also matches GPT-4o-0513 in coding, instruction following, longer queries, and multi-turn conversations.\n\nTrained through RLHF with Qwen-2.5-72B-Instruct as the base model, Athene-V2-Chat-72B excels in chat, math, and coding. Additionally, its sister model, Athene-V2-Agent-72B, surpasses GPT-4o in complex function calling and agentic applications, further extending its capabilities.\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Athene-72b](https://huggingface.co/cortexso/athene/tree/72b) | `cortex run athene:72b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/athene\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run athene\n    ```\n    \n## Credits\n\n- **Author:** Nexusflow\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/Nexusflow/Athene-V2-Chat/blob/main/Nexusflow_Research_License_.pdf)\n- **Papers:** [Athene V2 Blog](https://nexusflow.ai/blogs/athene-v2)",
-      "disabled": false,
-      "downloads": 13,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 72706203648
-      },
-      "id": "cortexso/athene",
-      "lastModified": "2025-03-03T06:04:09.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/athene",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "a92447ca675e741541855ac03b8f144dee1067c4",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "athene-v2-chat-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 47415715136,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "athene:72b",
-        "size": 47415715136
-      }
-    ]
-  },
-  {
-    "author": "MistralAI",
-    "id": "cortexso/mistral",
-    "metadata": {
-      "_id": "6667b1796e382e809d62b9fc",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-11T02:07:53.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMistral 7B, a 7-billion-parameter Large Language Model by Mistral AI. Designed for efficiency and performance, it suits real-time applications requiring swift responses.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Mistra-7b](https://huggingface.co/cortexhub/mistral/tree/7b) | `cortex run mistral:7b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/mistral\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run mistral\n    ```\n    \n## Credits\n\n- **Author:** MistralAI\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Licence](https://mistral.ai/licenses/MNPL-0.1.md)\n- **Papers:** [Mistral paper](https://arxiv.org/abs/2310.06825)",
-      "disabled": false,
-      "downloads": 1895,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "chat_template": "{%- if messages[0][\"role\"] == \"system\" %}\n    {%- set system_message = messages[0][\"content\"] %}\n    {%- set loop_messages = messages[1:] %}\n{%- else %}\n    {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n{%- set user_messages = loop_messages | selectattr(\"role\", \"equalto\", \"user\") | list %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n    {%- if not (message.role == \"tool\" or message.role == \"tool_results\" or (message.tool_calls is defined and message.tool_calls is not none)) %}\n        {%- if (message[\"role\"] == \"user\") != (ns.index % 2 == 0) %}\n            {{- raise_exception(\"After the optional system message, conversation roles must alternate user/assistant/user/assistant/...\") }}\n        {%- endif %}\n        {%- set ns.index = ns.index + 1 %}\n    {%- endif %}\n{%- endfor %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n    {%- if message[\"role\"] == \"user\" %}\n        {%- if tools is not none and (message == user_messages[-1]) %}\n            {{- \"[AVAILABLE_TOOLS] [\" }}\n            {%- for tool in tools %}\n                {%- set tool = tool.function %}\n                {{- '{\"type\": \"function\", \"function\": {' }}\n                {%- for key, val in tool.items() if key != \"return\" %}\n                    {%- if val is string %}\n                        {{- '\"' + key + '\": \"' + val + '\"' }}\n                    {%- else %}\n                        {{- '\"' + key + '\": ' + val|tojson }}\n                    {%- endif %}\n                    {%- if not loop.last %}\n                        {{- \", \" }}\n                    {%- endif %}\n                {%- endfor %}\n                {{- \"}}\" }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- else %}\n                    {{- \"]\" }}\n                {%- endif %}\n            {%- endfor %}\n            {{- \"[/AVAILABLE_TOOLS]\" }}\n            {%- endif %}\n        {%- if loop.last and system_message is defined %}\n            {{- \"[INST] \" + system_message + \"\\n\\n\" + message[\"content\"] + \"[/INST]\" }}\n        {%- else %}\n            {{- \"[INST] \" + message[\"content\"] + \"[/INST]\" }}\n        {%- endif %}\n    {%- elif message.tool_calls is defined and message.tool_calls is not none %}\n        {{- \"[TOOL_CALLS] [\" }}\n        {%- for tool_call in message.tool_calls %}\n            {%- set out = tool_call.function|tojson %}\n            {{- out[:-1] }}\n            {%- if not tool_call.id is defined or tool_call.id|length != 9 %}\n                {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n            {%- endif %}\n            {{- ', \"id\": \"' + tool_call.id + '\"}' }}\n            {%- if not loop.last %}\n                {{- \", \" }}\n            {%- else %}\n                {{- \"]\" + eos_token }}\n            {%- endif %}\n        {%- endfor %}\n    {%- elif message[\"role\"] == \"assistant\" %}\n        {{- \" \" + message[\"content\"]|trim + eos_token}}\n    {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n        {%- if message.content is defined and message.content.content is defined %}\n            {%- set content = message.content.content %}\n        {%- else %}\n            {%- set content = message.content %}\n        {%- endif %}\n        {{- '[TOOL_RESULTS] {\"content\": ' + content|string + \", \" }}\n        {%- if not message.tool_call_id is defined or message.tool_call_id|length != 9 %}\n            {{- raise_exception(\"Tool call IDs should be alphanumeric strings with length 9!\") }}\n        {%- endif %}\n        {{- '\"call_id\": \"' + message.tool_call_id + '\"}[/TOOL_RESULTS]' }}\n    {%- else %}\n        {{- raise_exception(\"Only user and assistant roles are supported, with the exception of an initial optional system message!\") }}\n    {%- endif %}\n{%- endfor %}\n",
-        "context_length": 32768,
-        "eos_token": "</s>",
-        "total": 7248023552
-      },
-      "id": "cortexso/mistral",
-      "lastModified": "2025-03-03T02:39:43.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/mistral",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "125b0ef1bdf6441d5c00f6a6a24a491214e532bd",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q2_k.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q6_k.gguf"
-        },
-        {
-          "rfilename": "mistral-7b-instruct-v0.3-q8_0.gguf"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2310.06825",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 49914826528,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "mistral:7b",
-        "size": 4372815680
-      }
-    ]
-  },
-  {
-    "author": "HuggingFaceTB",
-    "id": "cortexso/smollm2",
-    "metadata": {
-      "_id": "672408e4603a8644ff7505f0",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-10-31T22:47:00.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nSmolLM2 is a family of compact language models available in three sizes: 135M, 360M, and 1.7B parameters. These models are designed to solve a wide range of tasks while being lightweight enough for on-device deployment. More details can be found in the [SmolLM2 paper](https://arxiv.org/abs/2502.02737v1).\n\nThe **1.7B variant** demonstrates significant improvements over its predecessor, SmolLM1-1.7B, especially in instruction following, knowledge retention, reasoning, and mathematical problem-solving. It was trained on **11 trillion tokens** using a diverse dataset combination, including **FineWeb-Edu, DCLM, The Stack**, and newly curated mathematics and coding datasets that will be released soon.\n\nThe **instruct version** of SmolLM2 was developed through **supervised fine-tuning (SFT)** using a mix of public datasets and curated proprietary datasets. It further benefits from **Direct Preference Optimization (DPO)** using **UltraFeedback**. \n\nAdditionally, the instruct model supports tasks such as **text rewriting, summarization, and function calling**, enabled by datasets from **Argilla**, including **Synth-APIGen-v0.1**. The SFT dataset is available at: [SmolTalk SFT Dataset](https://huggingface.co/datasets/HuggingFaceTB/smoltalk).\n\nFor further details, visit the [SmolLM2 GitHub repository](https://github.com/huggingface/smollm), where you will find resources for **pre-training, post-training, evaluation, and local inference**.\n\n## Variants\n\n| No | Variant                                                | Cortex CLI command     |\n| -- | ------------------------------------------------------ | ---------------------- |\n| 1  | [Smollm2-1.7b](https://huggingface.co/cortexso/smollm2/tree/1.7b)           | `cortex run smollm2:1.7b`  |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexhub/smollm2\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run smollm2\n   ```\n\n## Credits\n\n- **Author:** SmolLM2 Team\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [Apache 2.0](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [SmolLM2 Research](https://arxiv.org/abs/2502.02737v1)",
-      "disabled": false,
-      "downloads": 237,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|im_start|>",
-        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<|im_end|>",
-        "total": 1711376384
-      },
-      "id": "cortexso/smollm2",
-      "lastModified": "2025-03-03T03:51:13.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/smollm2",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "b825edad383d925571b4433f8d6b16eb7cc1e9fc",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "smollm2-1.7b-instruct-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2502.02737",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 11998369216,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "smollm2:1.7b",
-        "size": 1055609728
-      }
-    ]
-  },
-  {
-    "author": "allenai",
-    "id": "cortexso/tulu3",
-    "metadata": {
-      "_id": "6744a6a2e08fe3da3fcdfb36",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-25T16:32:34.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nTülu3 is a state-of-the-art instruction-following model family developed by Allen Institute for AI. It is designed to excel in a wide range of tasks beyond standard chat applications, including complex problem-solving in domains such as MATH, GSM8K, and IFEval. The Tülu3 series provides a fully open-source ecosystem, offering access to datasets, training code, and fine-tuning recipes to facilitate advanced model customization and experimentation.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Tulu3-8b](https://huggingface.co/cortexso/tulu3/tree/8b) | `cortex run tulu3:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/tulu3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run tulu3\n    ```\n    \n## Credits\n\n- **Author:** Allenai\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.15124)",
-      "disabled": false,
-      "downloads": 252,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'user' %}{{ '<|user|>\n' + message['content'] + '\n' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>\n'  + message['content'] + eos_token + '\n' }}{% else %}{{ '<|assistant|>\n'  + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>\n' }}{% endif %}{% endfor %}",
-        "context_length": 131072,
-        "eos_token": "<|end_of_text|>",
-        "total": 8030326848
-      },
-      "id": "cortexso/tulu3",
-      "lastModified": "2025-03-03T03:48:16.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/tulu3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "693fb27ee973a686d66f33ecc72b41172ec5a7d6",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q2_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q6_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-tulu-3-8b-sft-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2411.15124",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 56188233120,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "tulu3:8b",
-        "size": 4920780768
-      }
-    ]
-  },
-  {
-    "author": "Qwen Team",
-    "id": "cortexso/qwen3",
-    "metadata": {
-      "_id": "6810288ccbe4f92b62636b50",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp", "featured"]
-      },
-      "createdAt": "2025-04-29T01:17:00.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n- featured\n---\n\n## Overview\n\n**Qwen Team** developed and released the **Qwen3** series, a state-of-the-art family of language models optimized for advanced reasoning, dialogue, instruction-following, and agentic use cases. Qwen3 introduces innovative thinking/non-thinking mode switching, long context capabilities, and multilingual support, all while achieving high efficiency and performance.\n\nThe Qwen3 models span several sizes and include support for seamless reasoning, complex tool usage, and detailed multi-turn conversations, making them ideal for applications such as research assistants, code generation, enterprise chatbots, and more.\n\n## Variants\n\n### Qwen3\n\n| No | Variant                                                                                   | Branch | Cortex CLI command             |\n|----|--------------------------------------------------------------------------------------------|--------|-------------------------------|\n| 1  | [Qwen3-0.6B](https://huggingface.co/cortexso/qwen3/tree/0.6b)                                  | 0.6b     | `cortex run qwen3:0.6b`         |\n| 2  | [Qwen3-1.7B](https://huggingface.co/cortexso/qwen3/tree/1.7b)                                  | 1.7b     | `cortex run qwen3:1.7b`         |\n| 3  | [Qwen3-4B](https://huggingface.co/cortexso/qwen3/tree/4b)                                  | 4b     | `cortex run qwen3:4b`         |\n| 4  | [Qwen3-8B](https://huggingface.co/cortexso/qwen3/tree/8b)                                  | 8b     | `cortex run qwen3:8b`         |\n| 5  | [Qwen3-14B](https://huggingface.co/cortexso/qwen3/tree/14b)                                  | 14b     | `cortex run qwen3:14b`         |\n| 6  | [Qwen3-32B](https://huggingface.co/cortexso/qwen3/tree/32b)                                | 32b    | `cortex run qwen3:32b`        |\n| 7  | [Qwen3-30B-A3B](https://huggingface.co/cortexso/qwen3/tree/30b-a3b)                        | 30b-a3b| `cortex run qwen3:30b-a3b`    |\n\nEach branch contains multiple quantized GGUF versions:\n- **Qwen3-0.6B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-1.7B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-4B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-8B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-32B:** q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n- **Qwen3-30B-A3B:** *q2_k, q3_k_l, q3_k_m, q3_k_s, q4_k_m, q4_k_s, q5_k_m, q5_k_s, q6_k, q8_0\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n   ```bash\n   cortexso/qwen3\n   ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n   ```bash\n   cortex run qwen3\n   ```\n\n## Credits\n\n- **Author:** Qwen Team\n- **Converter:** [Menlo Research](https://menlo.ai/)\n- **Original License:** [License](https://www.apache.org/licenses/LICENSE-2.0)\n- **Blogs:** [Qwen3: Think Deeper, Act Faster](https://qwenlm.github.io/blog/qwen3/)",
-      "disabled": false,
-      "downloads": 6693,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen3",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set content = message.content %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in message.content %}\n                {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n                {%- set reasoning_content = message.content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
-        "context_length": 40960,
-        "eos_token": "<|im_end|>",
-        "total": 751632384
-      },
-      "id": "cortexso/qwen3",
-      "lastModified": "2025-05-08T15:50:21.000Z",
-      "likes": 1,
-      "model-index": null,
-      "modelId": "cortexso/qwen3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "d25d0999fbab8909f16173f21f2db8f9f58c0a28",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-0.6b-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-1.7b-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-14b-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-30b-a3b-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-32b-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-4b-q8_0.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q2_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q6_k.gguf"
-        },
-        {
-          "rfilename": "qwen3-8b-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "featured",
-        "text-generation",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 588411644672,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "qwen3:32b",
-        "size": 19762149088
-      },
-      {
-        "id": "qwen3:8b",
-        "size": 5027783808
-      },
-      {
-        "id": "qwen3:0.6b",
-        "size": 484219968
-      },
-      {
-        "id": "qwen3:4b",
-        "size": 2497280608
-      },
-      {
-        "id": "qwen3:30b-a3b",
-        "size": 18556686208
-      },
-      {
-        "id": "qwen3:14b",
-        "size": 9001753280
-      },
-      {
-        "id": "qwen3:1.7b",
-        "size": 1282439232
-      }
-    ]
-  },
-  {
-    "author": "TinyLlama",
-    "id": "cortexso/tinyllama",
-    "metadata": {
-      "_id": "66791800ca45b9165970f2fe",
-      "author": "cortexso",
-      "cardData": {
-        "license": "apache-2.0",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-24T06:53:52.000Z",
-      "description": "---\nlicense: apache-2.0\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nThe [TinyLlama](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) project aims to pretrain a 1.1B Llama model on 3 trillion tokens. This is the chat model finetuned  on a diverse range of synthetic dialogues generated by ChatGPT.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [TinyLLama-1b](https://huggingface.co/cortexso/tinyllama/tree/1b) | `cortex run tinyllama:1b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/tinyllama\n    ```\n    \n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run tinyllama\n    ```\n    \n## Credits\n\n- **Author:** Microsoft\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Tinyllama Paper](https://arxiv.org/abs/2401.02385)",
-      "disabled": false,
-      "downloads": 562,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<s>",
-        "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
-        "context_length": 2048,
-        "eos_token": "</s>",
-        "total": 1100048384
-      },
-      "id": "cortexso/tinyllama",
-      "lastModified": "2025-03-03T06:16:24.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/tinyllama",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "953054fd3565023c2bbd2381f2566f904f5bdc1f",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q2_k.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q6_k.gguf"
-        },
-        {
-          "rfilename": "tinyllama-1.1b-chat-v1.0-q8_0.gguf"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2401.02385",
-        "license:apache-2.0",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 8451229056,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "tinyllama:1b",
-        "size": 782045248
-      }
-    ]
-  },
-  {
-    "author": "meta-llama",
-    "id": "cortexso/llama3",
-    "metadata": {
-      "_id": "6667a6d52e5f1c08ec14469c",
-      "author": "cortexso",
-      "cardData": {
-        "license": "llama3",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-06-11T01:22:29.000Z",
-      "description": "---\nlicense: llama3\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3](https://huggingface.co/meta-llama/Meta-Llama-3-8B) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3-8b](https://huggingface.co/cortexso/llama3/tree/8b) | `cortex run llama3:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://llama.meta.com/llama3/license/)\n- **Papers:** [Llama-3 Blog](https://llama.meta.com/llama3/)",
-      "disabled": false,
-      "downloads": 646,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n    {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n    {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n    {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n    {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n    {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n    {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n    {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n    {#- Extract the first user message so we can plug it in here #}\n    {%- if messages | length != 0 %}\n        {%- set first_user_message = messages[0]['content']|trim %}\n        {%- set messages = messages[1:] %}\n    {%- else %}\n        {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n    {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n    {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n    {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n    {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n    {{- \"Do not use variables.\\n\\n\" }}\n    {%- for t in tools %}\n        {{- t | tojson(indent=4) }}\n        {{- \"\\n\\n\" }}\n    {%- endfor %}\n    {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n    {%- elif 'tool_calls' in message %}\n        {%- if not message.tool_calls|length == 1 %}\n            {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n        {%- endif %}\n        {%- set tool_call = message.tool_calls[0].function %}\n        {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n            {%- for arg_name, arg_val in tool_call.arguments | items %}\n                {{- arg_name + '=\"' + arg_val + '\"' }}\n                {%- if not loop.last %}\n                    {{- \", \" }}\n                {%- endif %}\n                {%- endfor %}\n            {{- \")\" }}\n        {%- else  %}\n            {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n            {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n            {{- '\"parameters\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- \"}\" }}\n        {%- endif %}\n        {%- if builtin_tools is defined %}\n            {#- This means we're in ipython mode #}\n            {{- \"<|eom_id|>\" }}\n        {%- else %}\n            {{- \"<|eot_id|>\" }}\n        {%- endif %}\n    {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n        {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n        {%- if message.content is mapping or message.content is iterable %}\n            {{- message.content | tojson }}\n        {%- else %}\n            {{- message.content }}\n        {%- endif %}\n        {{- \"<|eot_id|>\" }}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n",
-        "context_length": 131072,
-        "eos_token": "<|eot_id|>",
-        "total": 8030261312
-      },
-      "id": "cortexso/llama3",
-      "lastModified": "2025-03-03T06:19:24.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/llama3",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "fcf18c0b14bb2dc64c7f78da40ca88a8ff759fd5",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:llama3",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 70949951936,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "llama3:8b",
-        "size": 4920739072
-      }
-    ]
-  },
-  {
-    "author": "meta-llama",
-    "id": "cortexso/llama3.1",
-    "metadata": {
-      "_id": "66a76e01a1037fe261a5a472",
-      "author": "cortexso",
-      "cardData": {
-        "license": "llama3.1",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-07-29T10:25:05.000Z",
-      "description": "---\nlicense: llama3.1\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nMeta developed and released the [Meta Llama 3.1](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) family of large language models (LLMs), a collection of pretrained and instruction tuned generative text models in 8 and 70B sizes. The Llama 3 instruction tuned models are optimized for dialogue use cases and outperform many of the available open source chat models on common industry benchmarks. Further, in developing these models, we took great care to optimize helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Llama3.1-8b](https://huggingface.co/cortexso/llama3.1/tree/8b) | `cortex run llama3.1:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/llama3.1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run llama3.1\n    ```\n\n## Credits\n\n- **Author:** Meta\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B/blob/main/LICENSE)\n- **Papers:** [Llama-3.1 Blog](https://ai.meta.com/blog/meta-llama-3-1/)",
-      "disabled": false,
-      "downloads": 1048,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "context_length": 131072,
-        "eos_token": "<|end_of_text|>",
-        "total": 8030261312
-      },
-      "id": "cortexso/llama3.1",
-      "lastModified": "2025-03-02T14:27:57.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/llama3.1",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "256c4f2118a75d93a1dc368ac4ccf1fea16751c2",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q2_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q6_k.gguf"
-        },
-        {
-          "rfilename": "llama-3.1-8b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:llama3.1",
-        "endpoints_compatible",
-        "region:us"
-      ],
-      "usedStorage": 66029173888,
-      "widgetData": [
-        {
-          "text": "My name is Julien and I like to"
-        },
-        {
-          "text": "I like traveling by train because"
-        },
-        {
-          "text": "Paris is an amazing place to visit,"
-        },
-        {
-          "text": "Once upon a time,"
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "llama3.1:8b",
-        "size": 4920734176
-      }
-    ]
-  },
-  {
-    "author": "AIDC-AI",
-    "id": "cortexso/marco-o1",
-    "metadata": {
-      "_id": "6743b6140d46fa30e6ff2879",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-11-24T23:26:12.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\nMarco-o1 not only focuses on disciplines with standard answers, such as mathematics, physics, and coding—which are well-suited for reinforcement learning (RL)—but also places greater emphasis on open-ended resolutions. We aim to address the question: \"Can the o1 model effectively generalize to broader domains where clear standards are absent and rewards are challenging to quantify?\"\n\nCurrently, Marco-o1 Large Language Model (LLM) is powered by Chain-of-Thought (CoT) fine-tuning, Monte Carlo Tree Search (MCTS), reflection mechanisms, and innovative reasoning strategies—optimized for complex real-world problem-solving tasks.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Marco-o1-8b](https://huggingface.co/cortexso/marco-o1/tree/8b) | `cortex run marco-o1:8b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/marco-o1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run marco-o1\n    ```\n    \n## Credits\n\n- **Author:** AIDC-AI\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://huggingface.co/AIDC-AI/Marco-o1/blob/main/LICENSE)\n- **Papers:** [Paper](https://arxiv.org/abs/2411.14405)",
-      "disabled": false,
-      "downloads": 122,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<|endoftext|>",
-        "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\n\n你是一个经过良好训练的AI助手，你的名字是Marco-o1.由阿里国际数字商业集团的AI Business创造.\n        \n## 重要！！！！！\n当你回答问题时，你的思考应该在<Thought>内完成，<Output>内输出你的结果。\n<Thought>应该尽可能是英文，但是有2个特例，一个是对原文中的引用，另一个是是数学应该使用markdown格式，<Output>内的输出需要遵循用户输入的语言。\n        <|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-        "context_length": 32768,
-        "eos_token": "<|im_end|>",
-        "total": 7615616512
-      },
-      "id": "cortexso/marco-o1",
-      "lastModified": "2025-03-03T02:27:27.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/marco-o1",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "0c8e0cdbfb898e000cad200b2694c5c6e6710fc6",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "marco-o1-q2_k.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q6_k.gguf"
-        },
-        {
-          "rfilename": "marco-o1-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "arxiv:2411.14405",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 53341785824,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "marco-o1:8b",
-        "size": 4683071648
-      }
-    ]
-  },
-  {
-    "author": "DeepSeek-AI",
-    "id": "cortexso/deepseek-r1-distill-qwen-1.5b",
-    "metadata": {
-      "_id": "678e84d99d66241aabee008a",
-      "author": "cortexso",
-      "cardData": {
-        "license": "mit",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2025-01-20T17:16:09.000Z",
-      "description": "---\nlicense: mit\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n## Overview\n\n**DeepSeek** developed and released the [DeepSeek R1 Distill Qwen 1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) model, a distilled version of the Qwen 1.5B language model. It is fine-tuned for high-performance text generation and optimized for dialogue and information-seeking tasks. This model achieves a balance of efficiency and accuracy while maintaining a smaller footprint compared to the original Qwen 1.5B.\n\nThe model is designed for applications in customer support, conversational AI, and research, prioritizing both helpfulness and safety.\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Deepseek-r1-distill-qwen-1.5b-1.5b](https://huggingface.co/cortexso/deepseek-r1-distill-qwen-1.5b/tree/1.5b) | `cortex run deepseek-r1-distill-qwen-1.5b:1.5b` |\n\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexso/deepseek-r1-distill-qwen-1.5b\n    ```\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run deepseek-r1-distill-qwen-1.5b\n    ```\n## Credits\n\n- **Author:** DeepSeek\n- **Converter:** [Homebrew](https://www.homebrew.ltd/)\n- **Original License:** [License](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B#7-license)\n- **Papers:** [DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning](https://arxiv.org/html/2501.12948v1)",
-      "disabled": false,
-      "downloads": 539,
-      "gated": false,
-      "gguf": {
-        "architecture": "qwen2",
-        "bos_token": "<｜begin▁of▁sentence｜>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜><think>\\n'}}{% endif %}",
-        "context_length": 131072,
-        "eos_token": "<｜end▁of▁sentence｜>",
-        "total": 1777088000
-      },
-      "id": "cortexso/deepseek-r1-distill-qwen-1.5b",
-      "lastModified": "2025-03-03T05:24:13.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/deepseek-r1-distill-qwen-1.5b",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "14cbd3c8ac57a346c35f676fd5fe55befebd911e",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q2_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q6_k.gguf"
-        },
-        {
-          "rfilename": "deepseek-r1-distill-qwen-1.5b-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:mit",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 12728600096,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "deepseek-r1-distill-qwen-1.5b:1.5b",
-        "size": 1117320480
-      }
-    ]
-  },
-  {
-    "author": "PrimeIntellect",
-    "id": "cortexso/intellect-1",
-    "metadata": {
-      "_id": "674e48fc24f1ef616cd485de",
-      "author": "cortexso",
-      "cardData": {
-        "license": "other",
-        "pipeline_tag": "text-generation",
-        "tags": ["cortex.cpp"]
-      },
-      "createdAt": "2024-12-02T23:55:40.000Z",
-      "description": "---\nlicense: other\npipeline_tag: text-generation\ntags:\n- cortex.cpp\n---\n\n## Overview\n\nIntellect-1 is a high-performance instruction-tuned model developed by Qwen, designed to handle a broad range of natural language processing tasks with efficiency and precision. Optimized for dialogue, reasoning, and knowledge-intensive applications, Intellect-1 excels in structured generation, summarization, and retrieval-augmented tasks. It is part of an open ecosystem, providing transparency in training data, model architecture, and fine-tuning methodologies.\n\n\n## Variants\n\n| No | Variant | Cortex CLI command |\n| --- | --- | --- |\n| 1 | [Intellect-1-10b](https://huggingface.co/cortexso/intellect-1/tree/10b) | `cortex run intellect-1:10b` |\n\n## Use it with Jan (UI)\n\n1. Install **Jan** using [Quickstart](https://jan.ai/docs/quickstart)\n2. Use in Jan model Hub:\n    ```bash\n    cortexhub/intellect-1\n    ```\n\n## Use it with Cortex (CLI)\n\n1. Install **Cortex** using [Quickstart](https://cortex.jan.ai/docs/quickstart)\n2. Run the model with command:\n    ```bash\n    cortex run intellect-1\n    ```\n    \n## Credits\n\n- **Author:** Qwen\n- **Converter:** [Homebrew](https://homebrew.ltd/)\n- **Original License:** [Licence](https://choosealicense.com/licenses/apache-2.0/)\n- **Papers:** [Technical Paper](https://github.com/PrimeIntellect-ai/prime)",
-      "disabled": false,
-      "downloads": 182,
-      "gated": false,
-      "gguf": {
-        "architecture": "llama",
-        "bos_token": "<|begin_of_text|>",
-        "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
-        "context_length": 8192,
-        "eos_token": "<|eot_id|>",
-        "total": 10211381248
-      },
-      "id": "cortexso/intellect-1",
-      "lastModified": "2025-03-03T02:32:47.000Z",
-      "likes": 0,
-      "model-index": null,
-      "modelId": "cortexso/intellect-1",
-      "pipeline_tag": "text-generation",
-      "private": false,
-      "sha": "f46fd8109130aab2969fd9229d390051f774a761",
-      "siblings": [
-        {
-          "rfilename": ".gitattributes"
-        },
-        {
-          "rfilename": "README.md"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q2_k.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q3_k_l.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q3_k_m.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q3_k_s.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q4_k_m.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q4_k_s.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q5_k_m.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q5_k_s.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q6_k.gguf"
-        },
-        {
-          "rfilename": "intellect-1-instruct-q8_0.gguf"
-        },
-        {
-          "rfilename": "metadata.yml"
-        },
-        {
-          "rfilename": "model.yml"
-        }
-      ],
-      "spaces": [],
-      "tags": [
-        "gguf",
-        "cortex.cpp",
-        "text-generation",
-        "license:other",
-        "endpoints_compatible",
-        "region:us",
-        "conversational"
-      ],
-      "usedStorage": 71113603904,
-      "widgetData": [
-        {
-          "text": "Hi, what can you help me with?"
-        },
-        {
-          "text": "What is 84 * 3 / 2?"
-        },
-        {
-          "text": "Tell me an interesting fact about the universe!"
-        },
-        {
-          "text": "Explain quantum computing in simple terms."
-        }
-      ]
-    },
-    "models": [
-      {
-        "id": "intellect-1:10b",
-        "size": 6229006784
-      }
-    ]
-  }
-]
diff --git a/extensions/model-extension/resources/settings.json b/extensions/model-extension/resources/settings.json
deleted file mode 100644
index d896f1271..000000000
--- a/extensions/model-extension/resources/settings.json
+++ /dev/null
@@ -1,14 +0,0 @@
-[
-  {
-    "key": "hugging-face-access-token",
-    "title": "Hugging Face Access Token",
-    "description": "Access tokens programmatically authenticate your identity to the Hugging Face Hub, allowing applications to perform specific actions specified by the scope of permissions granted.",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": "",
-      "placeholder": "hf_**********************************",
-      "type": "password",
-      "inputActions": ["unobscure", "copy"]
-    }
-  }
-]
diff --git a/extensions/model-extension/rolldown.config.mjs b/extensions/model-extension/rolldown.config.mjs
deleted file mode 100644
index 54ea654ff..000000000
--- a/extensions/model-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,17 +0,0 @@
-import { defineConfig } from 'rolldown'
-import settingJson from './resources/settings.json' with { type: 'json' }
-import modelSources from './resources/default.json' with { type: 'json' }
-
-export default defineConfig({
-  input: 'src/index.ts',
-  output: {
-    format: 'esm',
-    file: 'dist/index.js',
-  },
-  platform: 'browser',
-  define: {
-    SETTINGS: JSON.stringify(settingJson),
-    CORTEX_API_URL: JSON.stringify(`http://127.0.0.1:${process.env.CORTEX_API_PORT ?? "39291"}`),
-    DEFAULT_MODEL_SOURCES: JSON.stringify(modelSources),
-  },
-})
diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts
deleted file mode 100644
index e4d269cdb..000000000
--- a/extensions/model-extension/src/@types/global.d.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-declare const NODE: string
-declare const CORTEX_API_URL: string
-declare const SETTINGS: SettingComponentProps[]
-declare const DEFAULT_MODEL_SOURCES: any
-
-interface Core {
-  api: APIFunctions
-  events: EventEmitter
-}
-interface Window {
-  core?: Core | undefined
-  electronAPI?: any | undefined
-}
diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts
deleted file mode 100644
index a339c8c9b..000000000
--- a/extensions/model-extension/src/index.test.ts
+++ /dev/null
@@ -1,88 +0,0 @@
-import { describe, it, expect, beforeEach, vi } from 'vitest'
-import JanModelExtension from './index'
-import ky from 'ky'
-import { ModelManager } from '@janhq/core'
-
-const API_URL = 'http://localhost:3000'
-
-vi.stubGlobal('API_URL', API_URL)
-
-describe('JanModelExtension', () => {
-  let extension: JanModelExtension
-
-  beforeEach(() => {
-    extension = new JanModelExtension()
-    vi.spyOn(ModelManager, 'instance').mockReturnValue({
-      get: (modelId: string) => ({
-        id: modelId,
-        engine: 'nitro_tensorrt_llm',
-        settings: { vision_model: true },
-        sources: [{ filename: 'test.bin' }],
-      }),
-    } as any)
-    vi.spyOn(JanModelExtension.prototype, 'cancelModelPull').mockImplementation(
-      async (model: string) => {
-        const kyDeleteSpy = vi.spyOn(ky, 'delete').mockResolvedValue({
-          json: () => Promise.resolve({}),
-        } as any)
-
-        await ky.delete(`${API_URL}/v1/models/pull`, {
-          json: { taskId: model },
-        })
-
-        expect(kyDeleteSpy).toHaveBeenCalledWith(`${API_URL}/v1/models/pull`, {
-          json: { taskId: model },
-        })
-
-        kyDeleteSpy.mockRestore() // Restore the original implementation
-      }
-    )
-  })
-
-  it('should initialize with an empty queue', () => {
-    expect(extension.queue.size).toBe(0)
-  })
-
-  describe('pullModel', () => {
-    it('should call the pull model endpoint with correct parameters', async () => {
-      const model = 'test-model'
-      const id = 'test-id'
-      const name = 'test-name'
-
-      const kyPostSpy = vi.spyOn(ky, 'post').mockReturnValue({
-        json: () => Promise.resolve({}),
-      } as any)
-
-      await extension.pullModel(model, id, name)
-
-      expect(kyPostSpy).toHaveBeenCalledWith(`${API_URL}/v1/models/pull`, {
-        json: { model, id, name },
-      })
-
-      kyPostSpy.mockRestore() // Restore the original implementation
-    })
-  })
-
-  describe('cancelModelPull', () => {
-    it('should call the cancel model pull endpoint with the correct model', async () => {
-      const model = 'test-model'
-
-      await extension.cancelModelPull(model)
-    })
-  })
-
-  describe('deleteModel', () => {
-    it('should call the delete model endpoint with the correct model', async () => {
-      const model = 'test-model'
-      const kyDeleteSpy = vi
-        .spyOn(ky, 'delete')
-        .mockResolvedValue({ json: () => Promise.resolve({}) } as any)
-
-      await extension.deleteModel(model)
-
-      expect(kyDeleteSpy).toHaveBeenCalledWith(`${API_URL}/v1/models/${model}`)
-
-      kyDeleteSpy.mockRestore() // Restore the original implementation
-    })
-  })
-})
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
deleted file mode 100644
index 3f2f06ff2..000000000
--- a/extensions/model-extension/src/index.ts
+++ /dev/null
@@ -1,436 +0,0 @@
-import {
-  ModelExtension,
-  Model,
-  joinPath,
-  dirName,
-  fs,
-  OptionType,
-  ModelSource,
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@janhq/core'
-import { scanModelsFolder } from './legacy/model-json'
-import { deleteModelFiles } from './legacy/delete'
-import ky, { KyInstance } from 'ky'
-
-/**
- * cortex.cpp setting keys
- */
-export enum Settings {
-  huggingfaceToken = 'hugging-face-access-token',
-}
-
-/** Data List Response Type */
-type Data<T> = {
-  data: T[]
-}
-
-/**
- * Defaul mode sources
- */
-const defaultModelSources = ['Menlo/Jan-nano-gguf', 'Menlo/Jan-nano-128k-gguf']
-
-/**
- * A extension for models
- */
-export default class JanModelExtension extends ModelExtension {
-  api?: KyInstance
-  /**
-   * Get the API instance
-   * @returns
-   */
-  async apiInstance(): Promise<KyInstance> {
-    if (this.api) return this.api
-    const apiKey = await window.core?.api.appToken()
-    this.api = ky.extend({
-      prefixUrl: CORTEX_API_URL,
-      headers: apiKey
-        ? {
-          Authorization: `Bearer ${apiKey}`,
-        }
-        : {},
-      retry: 10,
-    })
-    return this.api
-  }
-  /**
-   * Called when the extension is loaded.
-   */
-  async onLoad() {
-    this.registerSettings(SETTINGS)
-
-    // Configure huggingface token if available
-    const huggingfaceToken = await this.getSetting<string>(
-      Settings.huggingfaceToken,
-      undefined
-    )
-    if (huggingfaceToken) {
-      this.updateCortexConfig({ huggingface_token: huggingfaceToken })
-    }
-
-    // Sync with cortexsohub
-    this.fetchModelsHub()
-  }
-
-  /**
-   * Subscribe to settings update and make change accordingly
-   * @param key
-   * @param value
-   */
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.huggingfaceToken) {
-      this.updateCortexConfig({ huggingface_token: value })
-    }
-  }
-
-  /**
-   * Called when the extension is unloaded.
-   * @override
-   */
-  async onUnload() { }
-
-  // BEGIN: - Public API
-  /**
-   * Downloads a machine learning model.
-   * @param model - The model to download.
-   * @returns A Promise that resolves when the model is downloaded.
-   */
-  async pullModel(model: string, id?: string, name?: string): Promise<void> {
-    /**
-     * Sending POST to /models/pull/{id} endpoint to pull the model
-     */
-    return this.apiInstance().then((api) =>
-      api
-        .post('v1/models/pull', { json: { model, id, name }, timeout: false })
-        .json()
-        .catch(async (e) => {
-          throw (await e.response?.json()) ?? e
-        })
-        .then()
-    )
-  }
-
-  /**
-   * Cancels the download of a specific machine learning model.
-   *
-   * @param {string} model - The ID of the model whose download is to be cancelled.
-   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
-   */
-  async cancelModelPull(model: string): Promise<void> {
-    /**
-     * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
-     */
-    return this.apiInstance().then((api) =>
-      api
-        .delete('v1/models/pull', { json: { taskId: model } })
-        .json()
-        .then()
-    )
-  }
-
-  /**
-   * Deletes a pulled model
-   * @param model - The model to delete
-   * @returns A Promise that resolves when the model is deleted.
-   */
-  async deleteModel(model: string): Promise<void> {
-    return this.apiInstance()
-      .then((api) => api.delete(`v1/models/${model}`).json().then())
-      .catch((e) => console.debug(e))
-      .finally(async () => {
-        // Delete legacy model files
-        await deleteModelFiles(model).catch((e) => console.debug(e))
-      }) as Promise<void>
-  }
-
-  /**
-   * Gets all pulled models
-   * @returns A Promise that resolves with an array of all models.
-   */
-  async getModels(): Promise<Model[]> {
-    /**
-     * Legacy models should be supported
-     */
-    let legacyModels = await scanModelsFolder()
-
-    /**
-     * Here we are filtering out the models that are not imported
-     * and are not using llama.cpp engine
-     */
-    var toImportModels = legacyModels.filter((e) => e.engine === 'nitro')
-
-    /**
-     * Fetch models from cortex.cpp
-     */
-    var fetchedModels = await this.fetchModels().catch(() => [])
-
-    // Checking if there are models to import
-    const existingIds = fetchedModels.map((e) => e.id)
-    toImportModels = toImportModels.filter(
-      (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
-    )
-
-    /**
-     * There is no model to import
-     * just return fetched models
-     */
-    if (!toImportModels.length)
-      return fetchedModels.concat(
-        legacyModels.filter((e) => !fetchedModels.some((x) => x.id === e.id))
-      )
-
-    console.log('To import models:', toImportModels.length)
-    /**
-     * There are models to import
-     */
-    if (toImportModels.length > 0) {
-      // Import models
-      await Promise.all(
-        toImportModels.map(async (model: Model & { file_path: string }) => {
-          return this.importModel(
-            model.id,
-            model.sources?.[0]?.url.startsWith('http') ||
-              !(await fs.existsSync(model.sources?.[0]?.url))
-              ? await joinPath([
-                await dirName(model.file_path),
-                model.sources?.[0]?.filename ??
-                model.settings?.llama_model_path ??
-                model.sources?.[0]?.url.split('/').pop() ??
-                model.id,
-              ]) // Copied models
-              : model.sources?.[0]?.url, // Symlink models,
-            model.name
-          )
-            .then((e) => {
-              this.updateModel({
-                id: model.id,
-                ...model.settings,
-                ...model.parameters,
-              } as Partial<Model>)
-            })
-            .catch((e) => {
-              console.debug(e)
-            })
-        })
-      )
-    }
-
-    /**
-     * Models are imported successfully before
-     * Now return models from cortex.cpp and merge with legacy models which are not imported
-     */
-    return await this.fetchModels()
-      .then((models) => {
-        return models.concat(
-          legacyModels.filter((e) => !models.some((x) => x.id === e.id))
-        )
-      })
-      .catch(() => Promise.resolve(legacyModels))
-  }
-
-  /**
-   * Update a pulled model metadata
-   * @param model - The metadata of the model
-   */
-  async updateModel(model: Partial<Model>): Promise<Model> {
-    return this.apiInstance()
-      .then((api) =>
-        api
-          .patch(`v1/models/${model.id}`, {
-            json: { ...model },
-            timeout: false,
-          })
-          .json()
-          .then()
-      )
-      .then(() => this.getModel(model.id))
-  }
-
-  /**
-   * Get a model by its ID
-   * @param model - The ID of the model
-   */
-  async getModel(model: string): Promise<Model> {
-    return this.apiInstance().then((api) =>
-      api
-        .get(`v1/models/${model}`)
-        .json()
-        .then((e) => this.transformModel(e))
-    ) as Promise<Model>
-  }
-
-  /**
-   * Import an existing model file
-   * @param model
-   * @param optionType
-   */
-  async importModel(
-    model: string,
-    modelPath: string,
-    name?: string,
-    option?: OptionType
-  ): Promise<void> {
-    return this.apiInstance().then((api) =>
-      api
-        .post('v1/models/import', {
-          json: { model, modelPath, name, option },
-          timeout: false,
-        })
-        .json()
-        .catch((e) => console.debug(e)) // Ignore error
-        .then()
-    )
-  }
-
-  // BEGIN - Model Sources
-  /**
-   * Get model sources
-   * @param model
-   */
-  async getSources(): Promise<ModelSource[]> {
-    return []
-    const sources = await this.apiInstance()
-      .then((api) => api.get('v1/models/sources').json<Data<ModelSource>>())
-      .then((e) => (typeof e === 'object' ? (e.data as ModelSource[]) : []))
-      // Deprecated source - filter out from legacy sources
-      .then((e) => e.filter((x) => x.id.toLowerCase() !== 'menlo/jan-nano'))
-      .catch(() => [])
-    return sources.concat(
-      DEFAULT_MODEL_SOURCES.filter((e) => !sources.some((x) => x.id === e.id))
-    )
-  }
-
-  /**
-   * Add a model source
-   * @param model
-   */
-  async addSource(source: string): Promise<any> {
-    return
-    return this.apiInstance().then((api) =>
-      api.post('v1/models/sources', {
-        json: {
-          source,
-        },
-      })
-    )
-  }
-
-  /**
-   * Delete a model source
-   * @param model
-   */
-  async deleteSource(source: string): Promise<any> {
-    return this.apiInstance().then((api) =>
-      api.delete('v1/models/sources', {
-        json: {
-          source,
-        },
-        timeout: false,
-      })
-    )
-  }
-  // END - Model Sources
-
-  /**
-   * Check model status
-   * @param model
-   */
-  async isModelLoaded(model: string): Promise<boolean> {
-    return this.apiInstance()
-      .then((api) => api.get(`v1/models/status/${model}`))
-      .then((e) => true)
-      .catch(() => false)
-  }
-
-  /**
-   * Configure pull options such as proxy, headers, etc.
-   */
-  async configurePullOptions(options: { [key: string]: any }): Promise<any> {
-    return this.updateCortexConfig(options).catch((e) => console.debug(e))
-  }
-
-  /**
-   * Fetches models list from cortex.cpp
-   * @param model
-   * @returns
-   */
-  async fetchModels(): Promise<Model[]> {
-    return []
-    return this.apiInstance()
-      .then((api) => api.get('v1/models?limit=-1').json<Data<Model>>())
-      .then((e) =>
-        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
-      )
-  }
-  // END: - Public API
-
-  // BEGIN: - Private API
-
-  /**
-   * Transform model to the expected format (e.g. parameters, settings, metadata)
-   * @param model
-   * @returns
-   */
-  private transformModel(model: any) {
-    model.parameters = {
-      ...extractInferenceParams(model),
-      ...model.parameters,
-      ...model.inference_params,
-    }
-    model.settings = {
-      ...extractModelLoadParams(model),
-      ...model.settings,
-    }
-    model.metadata = model.metadata ?? {
-      tags: [],
-      size: model.size ?? model.metadata?.size ?? 0,
-    }
-    return model as Model
-  }
-
-  /**
-   * Update cortex config
-   * @param body
-   */
-  private async updateCortexConfig(body: {
-    [key: string]: any
-  }): Promise<void> {
-    return this.apiInstance()
-      .then((api) => api.patch('v1/configs', { json: body }).then(() => { }))
-      .catch((e) => console.debug(e))
-  }
-
-  /**
-   * Fetch models from cortex.so
-   */
-  fetchModelsHub = async () => {
-    return
-    const models = await this.fetchModels()
-
-    defaultModelSources.forEach((model) => {
-      this.addSource(model).catch((e) => {
-        console.debug(`Failed to add default model source ${model}:`, e)
-      })
-    })
-    return this.apiInstance()
-      .then((api) =>
-        api
-          .get('v1/models/hub?author=cortexso&tag=cortex.cpp')
-          .json<Data<string>>()
-          .then(async (e) => {
-            await Promise.all(
-              [...(e.data ?? []), ...defaultModelSources].map((model) => {
-                if (
-                  !models.some(
-                    (e) => 'modelSource' in e && e.modelSource === model
-                  )
-                )
-                  return this.addSource(model).catch((e) => console.debug(e))
-              })
-            )
-          })
-      )
-      .catch((e) => console.debug(e))
-  }
-  // END: - Private API
-}
diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
deleted file mode 100644
index 43fa56d69..000000000
--- a/extensions/model-extension/src/legacy/delete.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-import { dirName, fs } from '@janhq/core'
-import { scanModelsFolder } from './model-json'
-
-export const deleteModelFiles = async (id: string) => {
-  try {
-    const models = await scanModelsFolder()
-    const dirPath = models.find((e) => e.id === id)?.file_path
-    // remove model folder directory
-    if (dirPath) await fs.rm(await dirName(dirPath))
-  } catch (err) {
-    console.error(err)
-  }
-}
diff --git a/extensions/model-extension/src/legacy/model-json.test.ts b/extensions/model-extension/src/legacy/model-json.test.ts
deleted file mode 100644
index f90f13646..000000000
--- a/extensions/model-extension/src/legacy/model-json.test.ts
+++ /dev/null
@@ -1,89 +0,0 @@
-import { describe, it, expect, beforeEach, vi } from 'vitest'
-import { scanModelsFolder, getModelJsonPath } from './model-json'
-
-// Mock the @janhq/core module
-vi.mock('@janhq/core', () => ({
-  InferenceEngine: {
-    nitro: 'nitro',
-  },
-  fs: {
-    existsSync: vi.fn(),
-    readdirSync: vi.fn(),
-    fileStat: vi.fn(),
-    readFileSync: vi.fn(),
-  },
-  joinPath: vi.fn((paths) => paths.join('/')),
-}))
-
-// Import the mocked fs and joinPath after the mock is set up
-import { fs } from '@janhq/core'
-
-describe('model-json', () => {
-  beforeEach(() => {
-    vi.clearAllMocks()
-  })
-
-  describe('scanModelsFolder', () => {
-    it('should return an empty array when models folder does not exist', async () => {
-      vi.spyOn(fs, 'existsSync').mockReturnValue(false)
-
-      const result = await scanModelsFolder()
-      expect(result).toEqual([])
-    })
-
-    it('should return an array of models when valid model folders exist', async () => {
-      const mockModelJson = {
-        id: 'test-model',
-        sources: [
-          {
-            filename: 'test-model',
-            url: 'file://models/test-model/test-model.gguf',
-          },
-        ],
-      }
-
-      vi.spyOn(fs, 'existsSync').mockReturnValue(true)
-      vi.spyOn(fs, 'readdirSync').mockReturnValueOnce(['test-model'])
-      vi.spyOn(fs, 'fileStat').mockResolvedValue({ isDirectory: () => true })
-      vi.spyOn(fs, 'readFileSync').mockReturnValue(
-        JSON.stringify(mockModelJson)
-      )
-      vi.spyOn(fs, 'readdirSync').mockReturnValueOnce([
-        'test-model.gguf',
-        'model.json',
-      ])
-
-      const result = await scanModelsFolder()
-      expect(result).toHaveLength(1)
-      expect(result[0]).toMatchObject(mockModelJson)
-    })
-  })
-
-  describe('getModelJsonPath', () => {
-    it('should return undefined when folder does not exist', async () => {
-      vi.spyOn(fs, 'existsSync').mockReturnValue(false)
-
-      const result = await getModelJsonPath('non-existent-folder')
-      expect(result).toBeUndefined()
-    })
-
-    it('should return the path when model.json exists in the root folder', async () => {
-      vi.spyOn(fs, 'existsSync').mockReturnValue(true)
-      vi.spyOn(fs, 'readdirSync').mockReturnValue(['model.json'])
-
-      const result = await getModelJsonPath('test-folder')
-      expect(result).toBe('test-folder/model.json')
-    })
-
-    it('should return the path when model.json exists in a subfolder', async () => {
-      vi.spyOn(fs, 'existsSync').mockReturnValue(true)
-      vi.spyOn(fs, 'readdirSync')
-        .mockReturnValueOnce(['subfolder'])
-        .mockReturnValueOnce(['model.json'])
-      vi.spyOn(fs, 'fileStat').mockResolvedValue({ isDirectory: () => true })
-
-      const result = await getModelJsonPath('test-folder')
-      expect(result).toBe('test-folder/subfolder/model.json')
-    })
-  })
-})
diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
deleted file mode 100644
index 15ffb6b1f..000000000
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ /dev/null
@@ -1,141 +0,0 @@
-import { Model, fs, joinPath } from '@janhq/core'
-//// LEGACY MODEL FOLDER ////
-/**
- * Scan through models folder and return downloaded models
- * @returns
- */
-export const scanModelsFolder = async (): Promise<
-  (Model & { file_path?: string })[]
-> => {
-  const _homeDir = 'file://models'
-  try {
-    if (!(await fs.existsSync(_homeDir))) {
-      console.debug('Model folder not found')
-      return []
-    }
-
-    const files: string[] = await fs.readdirSync(_homeDir)
-
-    const allDirectories: string[] = []
-
-    for (const modelFolder of files) {
-      const fullModelFolderPath = await joinPath([_homeDir, modelFolder])
-      if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue
-      allDirectories.push(modelFolder)
-    }
-
-    const readJsonPromises = allDirectories.map(async (dirName) => {
-      // filter out directories that don't match the selector
-      // read model.json
-      const folderFullPath = await joinPath([_homeDir, dirName])
-
-      const jsonPath = await getModelJsonPath(folderFullPath)
-
-      if (jsonPath && (await fs.existsSync(jsonPath))) {
-        // if we have the model.json file, read it
-        let model = await fs.readFileSync(jsonPath, 'utf-8')
-
-        model = typeof model === 'object' ? model : JSON.parse(model)
-
-        // This to ensure backward compatibility with `model.json` with `source_url`
-        if (model['source_url'] != null) {
-          model['sources'] = [
-            {
-              filename: model.id,
-              url: model['source_url'],
-            },
-          ]
-        }
-        model.file_path = jsonPath
-        model.file_name = 'model.json'
-
-        // Check model file exist
-        // model binaries (sources) are absolute path & exist (symlinked)
-        const existFiles = await Promise.all(
-          model.sources.map(
-            (source) =>
-              // Supposed to be a local file url
-              !source.url.startsWith(`http://`) &&
-              !source.url.startsWith(`https://`)
-          )
-        )
-        if (
-          !['cortex', 'llama-cpp', 'nitro'].includes(model.engine) ||
-          existFiles.every((exist) => exist)
-        )
-          return model
-
-        const result = await fs
-          .readdirSync(await joinPath([_homeDir, dirName]))
-          .then((files: string[]) => {
-            // Model binary exists in the directory
-            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
-            return (
-              files.includes(dirName) || // Legacy model GGUF without extension
-              files.filter((file) => {
-                return (
-                  file.toLowerCase().endsWith('.gguf') || // GGUF
-                  file.toLowerCase().endsWith('.engine') // Tensort-LLM
-                )
-              })?.length >=
-                (model.engine === 'nitro-tensorrt-llm'
-                  ? 1
-                  : model.sources?.length ?? 1)
-            )
-          })
-
-        if (result) return model
-        else return undefined
-      }
-    })
-    const results = await Promise.allSettled(readJsonPromises)
-    const modelData = results
-      .map((result) => {
-        if (result.status === 'fulfilled' && result.value) {
-          try {
-            const model =
-              typeof result.value === 'object'
-                ? result.value
-                : JSON.parse(result.value)
-            return model as Model
-          } catch {
-            console.debug(`Unable to parse model metadata: ${result.value}`)
-          }
-        }
-        return undefined
-      })
-      .filter(Boolean)
-
-    return modelData
-  } catch (err) {
-    console.error(err)
-    return []
-  }
-}
-
-/**
- * Retrieve the model.json path from a folder
- * @param folderFullPath
- * @returns
- */
-export const getModelJsonPath = async (
-  folderFullPath: string
-): Promise<string | undefined> => {
-  // try to find model.json recursively inside each folder
-  if (!(await fs.existsSync(folderFullPath))) return undefined
-  const files: string[] = await fs.readdirSync(folderFullPath)
-  if (files.length === 0) return undefined
-  if (files.includes('model.json')) {
-    return joinPath([folderFullPath, 'model.json'])
-  }
-  // continue recursive
-  for (const file of files) {
-    const path = await joinPath([folderFullPath, file])
-    const fileStats = await fs.fileStat(path)
-    if (fileStats.isDirectory) {
-      const result = await getModelJsonPath(path)
-      if (result) return result
-    }
-  }
-}
-//// END LEGACY MODEL FOLDER ////
diff --git a/extensions/model-extension/src/migration.test.ts b/extensions/model-extension/src/migration.test.ts
deleted file mode 100644
index fc7ebe8ba..000000000
--- a/extensions/model-extension/src/migration.test.ts
+++ /dev/null
@@ -1,160 +0,0 @@
-import { describe, it, expect, beforeEach, vi } from 'vitest'
-
-vi.stubGlobal('API_URL', 'http://localhost:3000')
-
-
-// Mock the @janhq/core module
-vi.mock('@janhq/core', (actual) => ({
-  ...actual,
-  ModelExtension: class {},
-  InferenceEngine: {
-    nitro: 'nitro',
-  },
-  joinPath: vi.fn(),
-  dirName: vi.fn(),
-  fs: {
-    existsSync: vi.fn(),
-    readFileSync: vi.fn(),
-    writeFileSync: vi.fn(),
-    mkdirSync: vi.fn(),
-  },
-}))
-
-import { Model, InferenceEngine } from '@janhq/core'
-
-import JanModelExtension from './index'
-
-// Mock the model-json module
-vi.mock('./legacy/model-json', () => ({
-  scanModelsFolder: vi.fn(),
-}))
-
-// Import the mocked scanModelsFolder after the mock is set up
-import * as legacy from './legacy/model-json'
-
-describe('JanModelExtension', () => {
-  let extension: JanModelExtension
-  let mockLocalStorage: { [key: string]: string }
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new JanModelExtension()
-    mockLocalStorage = {}
-
-    // Mock localStorage
-    Object.defineProperty(global, 'localStorage', {
-      value: {
-        getItem: vi.fn((key) => mockLocalStorage[key]),
-        setItem: vi.fn((key, value) => {
-          mockLocalStorage[key] = value
-        }),
-      },
-      writable: true,
-    })
-  })
-
-  describe('getModels', () => {
-    it('should scan models folder when localStorage is empty', async () => {
-      const mockModels: Model[] = [
-        {
-          id: 'model1',
-          object: 'model',
-          version: '1',
-          format: 'gguf',
-          engine: InferenceEngine.nitro,
-          sources: [
-            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
-          ],
-          file_path: '/path/to/model1',
-        },
-        {
-          id: 'model2',
-          object: 'model',
-          version: '1',
-          format: 'gguf',
-          engine: InferenceEngine.nitro,
-          sources: [
-            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
-          ],
-          file_path: '/path/to/model2',
-        },
-      ] as any
-      vi.mocked(legacy.scanModelsFolder).mockResolvedValue(mockModels)
-      vi.spyOn(extension, 'fetchModels').mockResolvedValue([mockModels[0]])
-      vi.spyOn(extension, 'updateModel').mockResolvedValue(undefined)
-      vi.spyOn(extension, 'importModel').mockResolvedValueOnce(mockModels[1])
-      vi.spyOn(extension, 'fetchModels').mockResolvedValue([mockModels[0], mockModels[1]])
-      const result = await extension.getModels()
-      expect(legacy.scanModelsFolder).toHaveBeenCalled()
-      expect(result).toEqual(mockModels)
-    })
-
-    it('should import models when there are models to import', async () => {
-      const mockModels: Model[] = [
-        {
-          id: 'model1',
-          object: 'model',
-          version: '1',
-          format: 'gguf',
-          engine: InferenceEngine.nitro,
-          file_path: '/path/to/model1',
-          sources: [
-            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
-          ],
-        },
-        {
-          id: 'model2',
-          object: 'model',
-          version: '1',
-          format: 'gguf',
-          engine: InferenceEngine.nitro,
-          file_path: '/path/to/model2',
-          sources: [
-            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
-          ],
-        },
-      ] as any
-      mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels)
-      vi.spyOn(extension, 'updateModel').mockResolvedValue(undefined)
-      vi.spyOn(extension, 'importModel').mockResolvedValue(undefined)
-
-      const result = await extension.getModels()
-
-      expect(extension.importModel).toHaveBeenCalledTimes(2)
-      expect(result).toEqual(mockModels)
-    })
-
-    it('should return models from cortexAPI when all models are already imported', async () => {
-      const mockModels: Model[] = [
-        {
-          id: 'model1',
-          object: 'model',
-          version: '1',
-          format: 'gguf',
-          engine: InferenceEngine.nitro,
-          sources: [
-            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
-          ],
-        },
-        {
-          id: 'model2',
-          object: 'model',
-          version: '1',
-          format: 'gguf',
-          engine: InferenceEngine.nitro,
-          sources: [
-            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
-          ],
-        },
-      ] as any
-      mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels)
-      vi.spyOn(extension, 'fetchModels').mockResolvedValue(mockModels)
-      extension.getModels = vi.fn().mockResolvedValue(mockModels)
-
-      const result = await extension.getModels()
-
-      expect(extension.getModels).toHaveBeenCalled()
-      expect(result).toEqual(mockModels)
-    })
-  })
-})
diff --git a/extensions/model-extension/tsconfig.json b/extensions/model-extension/tsconfig.json
deleted file mode 100644
index 1d3c112d4..000000000
--- a/extensions/model-extension/tsconfig.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-  "compilerOptions": {
-    "target": "es2016",
-    "module": "esnext",
-    "moduleResolution": "node",
-    "outDir": "./dist",
-    "esModuleInterop": true,
-    "forceConsistentCasingInFileNames": true,
-    "strict": false,
-    "skipLibCheck": true,
-    "rootDir": "./src"
-  },
-  "include": ["./src"],
-  "exclude": ["**/*.test.ts", "vite.config.ts"]
-}
diff --git a/extensions/model-extension/vite.config.ts b/extensions/model-extension/vite.config.ts
deleted file mode 100644
index a8ad5615f..000000000
--- a/extensions/model-extension/vite.config.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-import { defineConfig } from "vite"
-export default defineConfig(({ mode }) => ({
-  define: process.env.VITEST ? {} : { global: 'window' },
-  test: {
-    environment: 'jsdom',
-  },
-}))
-
diff --git a/mise.toml b/mise.toml
index 9f6cee5c7..55ceae1c0 100644
--- a/mise.toml
+++ b/mise.toml
@@ -54,9 +54,7 @@ depends = ["build-extensions"]
 description = "Start development server (matches Makefile)"
 depends = ["install-and-build"]
 run = [
-  "yarn install:cortex",
   "yarn download:bin",
-  "yarn copy:lib",
   "yarn dev"
 ]
 
@@ -64,9 +62,7 @@ run = [
 description = "Start development server with Tauri (DEPRECATED - matches Makefile)"
 depends = ["install-and-build"]
 run = [
-  "yarn install:cortex",
   "yarn download:bin",
-  "yarn copy:lib",
   "yarn dev:tauri"
 ]
 
@@ -83,7 +79,6 @@ run = "yarn build"
 description = "Build Tauri application (DEPRECATED - matches Makefile)"
 depends = ["install-and-build"]
 run = [
-  "yarn copy:lib",
   "yarn build"
 ]
 
diff --git a/web-app/src/containers/ChatInput.tsx b/web-app/src/containers/ChatInput.tsx
index 0cecb2bf3..64aa9af57 100644
--- a/web-app/src/containers/ChatInput.tsx
+++ b/web-app/src/containers/ChatInput.tsx
@@ -404,7 +404,7 @@ const ChatInput = ({ model, className, initialMessage }: ChatInputProps) => {
                   streamingContent && 'opacity-50 pointer-events-none'
                 )}
               >
-                {model?.provider === 'llama.cpp' && loadingModel ? (
+                {model?.provider === 'llamacpp' && loadingModel ? (
                   <ModelLoader />
                 ) : (
                   <DropdownModelProvider
diff --git a/web-app/src/containers/DropdownModelProvider.tsx b/web-app/src/containers/DropdownModelProvider.tsx
index 924b6014a..7767599ea 100644
--- a/web-app/src/containers/DropdownModelProvider.tsx
+++ b/web-app/src/containers/DropdownModelProvider.tsx
@@ -96,15 +96,15 @@ const DropdownModelProvider = ({
           selectModelProvider(lastUsed.provider, lastUsed.model)
         } else {
           // Fallback to default model if last used model no longer exists
-          selectModelProvider('llama.cpp', 'llama3.2:3b')
+          selectModelProvider('llamacpp', 'llama3.2:3b')
         }
       } else {
         // default model, we should add from setting
-        selectModelProvider('llama.cpp', 'llama3.2:3b')
+        selectModelProvider('llamacpp', 'llama3.2:3b')
       }
     } else {
       // default model for non-new-chat contexts
-      selectModelProvider('llama.cpp', 'llama3.2:3b')
+      selectModelProvider('llamacpp', 'llama3.2:3b')
     }
   }, [
     model,
@@ -150,8 +150,8 @@ const DropdownModelProvider = ({
       if (!provider.active) return
 
       provider.models.forEach((modelItem) => {
-        // Skip models that require API key but don't have one (except llama.cpp)
-        if (provider.provider !== 'llama.cpp' && !provider.api_key?.length) {
+        // Skip models that require API key but don't have one (except llamacpp)
+        if (provider.provider !== 'llamacpp' && !provider.api_key?.length) {
           return
         }
 
diff --git a/web-app/src/containers/SettingsMenu.tsx b/web-app/src/containers/SettingsMenu.tsx
index 8aea3b501..16a2583fa 100644
--- a/web-app/src/containers/SettingsMenu.tsx
+++ b/web-app/src/containers/SettingsMenu.tsx
@@ -18,6 +18,10 @@ import ProvidersAvatar from '@/containers/ProvidersAvatar'
 
 const SettingsMenu = () => {
   const { t } = useTranslation()
+  const { experimentalFeatures } = useGeneralSetting()
+  const { providers } = useModelProvider()
+  const firstItemProvider =
+    providers.length > 0 ? providers[0].provider : 'llamacpp'
   const [expandedProviders, setExpandedProviders] = useState(false)
   const [isMenuOpen, setIsMenuOpen] = useState(false)
   const matches = useMatches()
diff --git a/web-app/src/containers/dialogs/DeleteProvider.tsx b/web-app/src/containers/dialogs/DeleteProvider.tsx
index a18fc8c3f..6afba39d3 100644
--- a/web-app/src/containers/dialogs/DeleteProvider.tsx
+++ b/web-app/src/containers/dialogs/DeleteProvider.tsx
@@ -17,7 +17,6 @@ import { EngineManager } from '@janhq/core'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { useRouter } from '@tanstack/react-router'
 import { route } from '@/constants/routes'
-import { normalizeProvider } from '@/lib/models'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 
 type Props = {
@@ -30,7 +29,7 @@ const DeleteProvider = ({ provider }: Props) => {
   if (
     !provider ||
     Object.keys(models).includes(provider.provider) ||
-    EngineManager.instance().get(normalizeProvider(provider.provider))
+    EngineManager.instance().get(provider.provider)
   )
     return null
 
diff --git a/web-app/src/hooks/useChat.ts b/web-app/src/hooks/useChat.ts
index 3e9dd6363..1f2eb5a48 100644
--- a/web-app/src/hooks/useChat.ts
+++ b/web-app/src/hooks/useChat.ts
@@ -115,15 +115,11 @@ export const useChat = () => {
   ])
 
   const restartModel = useCallback(
-    async (
-      provider: ProviderObject,
-      modelId: string,
-      abortController: AbortController
-    ) => {
+    async (provider: ProviderObject, modelId: string) => {
       await stopAllModels()
       await new Promise((resolve) => setTimeout(resolve, 1000))
       updateLoadingModel(true)
-      await startModel(provider, modelId, abortController).catch(console.error)
+      await startModel(provider, modelId).catch(console.error)
       updateLoadingModel(false)
       await new Promise((resolve) => setTimeout(resolve, 1000))
     },
@@ -131,11 +127,7 @@ export const useChat = () => {
   )
 
   const increaseModelContextSize = useCallback(
-    async (
-      modelId: string,
-      provider: ProviderObject,
-      controller: AbortController
-    ) => {
+    async (modelId: string, provider: ProviderObject) => {
       /**
        * Should increase the context size of the model by 2x
        * If the context size is not set or too low, it defaults to 8192.
@@ -180,19 +172,14 @@ export const useChat = () => {
         })
       }
       const updatedProvider = getProviderByName(provider.provider)
-      if (updatedProvider)
-        await restartModel(updatedProvider, model.id, controller)
+      if (updatedProvider) await restartModel(updatedProvider, model.id)
 
       return updatedProvider
     },
     [getProviderByName, restartModel, updateProvider]
   )
   const toggleOnContextShifting = useCallback(
-    async (
-      modelId: string,
-      provider: ProviderObject,
-      controller: AbortController
-    ) => {
+    async (modelId: string, provider: ProviderObject) => {
       const providerName = provider.provider
       const newSettings = [...provider.settings]
       const settingKey = 'context_shift'
@@ -218,8 +205,7 @@ export const useChat = () => {
         ...updateObj,
       })
       const updatedProvider = getProviderByName(providerName)
-      if (updatedProvider)
-        await restartModel(updatedProvider, modelId, controller)
+      if (updatedProvider) await restartModel(updatedProvider, modelId)
       return updatedProvider
     },
     [updateProvider, getProviderByName, restartModel]
@@ -246,11 +232,9 @@ export const useChat = () => {
       try {
         if (selectedModel?.id) {
           updateLoadingModel(true)
-          await startModel(
-            activeProvider,
-            selectedModel.id,
-            abortController
-          ).catch(console.error)
+          await startModel(activeProvider, selectedModel.id).catch(
+            console.error
+          )
           updateLoadingModel(false)
         }
 
@@ -286,10 +270,6 @@ export const useChat = () => {
             availableTools,
             currentAssistant.parameters?.stream === false ? false : true,
             currentAssistant.parameters as unknown as Record<string, object>
-            // TODO: replace it with according provider setting later on
-            // selectedProvider === 'llama.cpp' && availableTools.length > 0
-            //   ? false
-            //   : true
           )
 
           if (!completion) throw new Error('No completion received')
@@ -298,7 +278,8 @@ export const useChat = () => {
           const toolCalls: ChatCompletionMessageToolCall[] = []
           try {
             if (isCompletionResponse(completion)) {
-              accumulatedText = completion.choices[0]?.message?.content || ''
+              accumulatedText =
+                (completion.choices[0]?.message?.content as string) || ''
               if (completion.choices[0]?.message?.tool_calls) {
                 toolCalls.push(...completion.choices[0].message.tool_calls)
               }
@@ -365,16 +346,14 @@ export const useChat = () => {
                 /// Increase context size
                 activeProvider = await increaseModelContextSize(
                   selectedModel.id,
-                  activeProvider,
-                  abortController
+                  activeProvider
                 )
                 continue
               } else if (method === 'context_shift' && selectedModel?.id) {
                 /// Enable context_shift
                 activeProvider = await toggleOnContextShifting(
                   selectedModel?.id,
-                  activeProvider,
-                  abortController
+                  activeProvider
                 )
                 continue
               } else throw error
@@ -387,7 +366,7 @@ export const useChat = () => {
             accumulatedText.length === 0 &&
             toolCalls.length === 0 &&
             activeThread.model?.id &&
-            activeProvider.provider === 'llama.cpp'
+            provider?.provider === 'llamacpp'
           ) {
             await stopModel(activeThread.model.id, 'cortex')
             throw new Error('No response received from the model')
diff --git a/web-app/src/hooks/useModelProvider.ts b/web-app/src/hooks/useModelProvider.ts
index e2f26b1f7..2c048a060 100644
--- a/web-app/src/hooks/useModelProvider.ts
+++ b/web-app/src/hooks/useModelProvider.ts
@@ -24,7 +24,7 @@ export const useModelProvider = create<ModelProviderState>()(
   persist(
     (set, get) => ({
       providers: [],
-      selectedProvider: 'llama.cpp',
+      selectedProvider: 'llamacpp',
       selectedModel: null,
       deletedModels: [],
       getModelBy: (modelId: string) => {
diff --git a/web-app/src/lib/completion.ts b/web-app/src/lib/completion.ts
index cbdd3cc77..df6f11897 100644
--- a/web-app/src/lib/completion.ts
+++ b/web-app/src/lib/completion.ts
@@ -5,6 +5,9 @@ import {
   MessageStatus,
   EngineManager,
   ModelManager,
+  chatCompletionRequestMessage,
+  chatCompletion,
+  chatCompletionChunk,
 } from '@janhq/core'
 import { invoke } from '@tauri-apps/api/core'
 import { fetch as fetchTauri } from '@tauri-apps/plugin-http'
@@ -24,11 +27,17 @@ type ExtendedConfigOptions = ConfigOptions & {
   fetch?: typeof fetch
 }
 import { ulid } from 'ulidx'
-import { normalizeProvider } from './models'
 import { MCPTool } from '@/types/completion'
 import { CompletionMessagesBuilder } from './messages'
 import { ChatCompletionMessageToolCall } from 'openai/resources'
 import { callTool } from '@/services/mcp'
+import { ExtensionManager } from './extension'
+
+export type ChatCompletionResponse =
+  | chatCompletion
+  | AsyncIterable<chatCompletionChunk>
+  | StreamCompletionResponse
+  | CompletionResponse
 
 /**
  * @fileoverview Helper functions for creating thread content.
@@ -124,7 +133,7 @@ export const sendCompletion = async (
   tools: MCPTool[] = [],
   stream: boolean = true,
   params: Record<string, object> = {}
-): Promise<StreamCompletionResponse | CompletionResponse | undefined> => {
+): Promise<ChatCompletionResponse | undefined> => {
   if (!thread?.model?.id || !provider) return undefined
 
   let providerName = provider.provider as unknown as keyof typeof models
@@ -144,7 +153,7 @@ export const sendCompletion = async (
     !(thread.model.id in Object.values(models).flat()) &&
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     !tokenJS.extendedModelExist(providerName as any, thread.model?.id) &&
-    provider.provider !== 'llama.cpp'
+    provider.provider !== 'llamacpp'
   ) {
     try {
       tokenJS.extendModelList(
@@ -163,38 +172,48 @@ export const sendCompletion = async (
     }
   }
 
-  // TODO: Add message history
-  const completion = stream
-    ? await tokenJS.chat.completions.create(
-        {
-          stream: true,
-          // eslint-disable-next-line @typescript-eslint/no-explicit-any
-          provider: providerName as any,
+  const engine = ExtensionManager.getInstance().getEngine(provider.provider)
+
+  const completion = engine
+    ? await engine.chat({
+        messages: messages as chatCompletionRequestMessage[],
+        model: thread.model?.id,
+        tools: normalizeTools(tools),
+        tool_choice: tools.length ? 'auto' : undefined,
+        stream: true,
+        ...params,
+      })
+    : stream
+      ? await tokenJS.chat.completions.create(
+          {
+            stream: true,
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            provider: providerName as any,
+            model: thread.model?.id,
+            messages,
+            tools: normalizeTools(tools),
+            tool_choice: tools.length ? 'auto' : undefined,
+            ...params,
+          },
+          {
+            signal: abortController.signal,
+          }
+        )
+      : await tokenJS.chat.completions.create({
+          stream: false,
+          provider: providerName,
           model: thread.model?.id,
           messages,
           tools: normalizeTools(tools),
           tool_choice: tools.length ? 'auto' : undefined,
           ...params,
-        },
-        {
-          signal: abortController.signal,
-        }
-      )
-    : await tokenJS.chat.completions.create({
-        stream: false,
-        provider: providerName,
-        model: thread.model?.id,
-        messages,
-        tools: normalizeTools(tools),
-        tool_choice: tools.length ? 'auto' : undefined,
-        ...params,
-      })
+        })
   return completion
 }
 
 export const isCompletionResponse = (
-  response: StreamCompletionResponse | CompletionResponse
-): response is CompletionResponse => {
+  response: ChatCompletionResponse
+): response is CompletionResponse | chatCompletion => {
   return 'choices' in response
 }
 
@@ -209,9 +228,9 @@ export const stopModel = async (
   provider: string,
   model: string
 ): Promise<void> => {
-  const providerObj = EngineManager.instance().get(normalizeProvider(provider))
+  const providerObj = EngineManager.instance().get(provider)
   const modelObj = ModelManager.instance().get(model)
-  if (providerObj && modelObj) return providerObj?.unload(modelObj)
+  if (providerObj && modelObj) return providerObj?.unload(model).then(() => {})
 }
 
 /**
@@ -241,7 +260,7 @@ export const normalizeTools = (
  * @param calls
  */
 export const extractToolCall = (
-  part: CompletionResponseChunk,
+  part: chatCompletionChunk | CompletionResponseChunk,
   currentCall: ChatCompletionMessageToolCall | null,
   calls: ChatCompletionMessageToolCall[]
 ) => {
diff --git a/web-app/src/lib/model.spec.ts b/web-app/src/lib/model.spec.ts
index 2f4598f3b..139597f9c 100644
--- a/web-app/src/lib/model.spec.ts
+++ b/web-app/src/lib/model.spec.ts
@@ -1,6 +1,2 @@
-import { expect, test } from 'vitest'
-import { normalizeProvider } from './models'
 
-test('provider name should be normalized', () => {
-  expect(normalizeProvider('llama.cpp')).toBe('cortex')
-})
+
diff --git a/web-app/src/lib/models.ts b/web-app/src/lib/models.ts
index 250a3a9b5..0f9b79c40 100644
--- a/web-app/src/lib/models.ts
+++ b/web-app/src/lib/models.ts
@@ -58,12 +58,3 @@ export const extractModelName = (model?: string) => {
 export const extractModelRepo = (model?: string) => {
   return model?.replace('https://huggingface.co/', '')
 }
-
-/**
- * Normalize the provider name to match the format used in the models object
- * @param provider - The provider name to normalize
- */
-export const normalizeProvider = (provider: string) => {
-  // TODO: After migrating to the new provider extension, remove this function
-  return provider === 'llama.cpp' ? 'cortex' : provider
-}
diff --git a/web-app/src/lib/utils.ts b/web-app/src/lib/utils.ts
index b193257f2..8486bcdb9 100644
--- a/web-app/src/lib/utils.ts
+++ b/web-app/src/lib/utils.ts
@@ -1,5 +1,6 @@
 import { type ClassValue, clsx } from 'clsx'
 import { twMerge } from 'tailwind-merge'
+import { ExtensionManager } from './extension'
 
 export function cn(...inputs: ClassValue[]) {
   return twMerge(clsx(inputs))
@@ -7,7 +8,7 @@ export function cn(...inputs: ClassValue[]) {
 
 export function getProviderLogo(provider: string) {
   switch (provider) {
-    case 'llama.cpp':
+    case 'llamacpp':
       return '/images/model-provider/llamacpp.svg'
     case 'anthropic':
       return '/images/model-provider/anthropic.svg'
@@ -38,7 +39,7 @@ export function getProviderLogo(provider: string) {
 
 export const getProviderTitle = (provider: string) => {
   switch (provider) {
-    case 'llama.cpp':
+    case 'llamacpp':
       return 'Llama.cpp'
     case 'openai':
       return 'OpenAI'
@@ -89,6 +90,11 @@ export function getReadableLanguageName(language: string): string {
   )
 }
 
+export const isLocalProvider = (provider: string) => {
+  const extension = ExtensionManager.getInstance().getEngine(provider)
+  return extension && 'load' in extension
+}
+
 export function fuzzySearch(needle: string, haystack: string) {
   const hlen = haystack.length
   const nlen = needle.length
diff --git a/web-app/src/providers/DataProvider.tsx b/web-app/src/providers/DataProvider.tsx
index 0c4c5f443..da14a7d92 100644
--- a/web-app/src/providers/DataProvider.tsx
+++ b/web-app/src/providers/DataProvider.tsx
@@ -3,10 +3,8 @@ import { useModelProvider } from '@/hooks/useModelProvider'
 
 import { useAppUpdater } from '@/hooks/useAppUpdater'
 import { fetchMessages } from '@/services/messages'
-import { fetchModels } from '@/services/models'
 import { getProviders } from '@/services/providers'
 import { fetchThreads } from '@/services/threads'
-import { ModelManager } from '@janhq/core'
 import { useEffect } from 'react'
 import { useMCPServers } from '@/hooks/useMCPServers'
 import { getMCPConfig } from '@/services/mcp'
@@ -31,10 +29,8 @@ export function DataProvider() {
   const navigate = useNavigate()
 
   useEffect(() => {
-    fetchModels().then((models) => {
-      models?.forEach((model) => ModelManager.instance().register(model))
-      getProviders().then(setProviders)
-    })
+    console.log('Initializing DataProvider...')
+    getProviders().then(setProviders)
     getMCPConfig().then((data) => setServers(data.mcpServers ?? []))
     getAssistants()
       .then((data) => {
diff --git a/web-app/src/routes/hub.tsx b/web-app/src/routes/hub.tsx
index 8fbb50722..b6a441538 100644
--- a/web-app/src/routes/hub.tsx
+++ b/web-app/src/routes/hub.tsx
@@ -31,7 +31,7 @@ import {
   DropdownMenuItem,
   DropdownMenuTrigger,
 } from '@/components/ui/dropdown-menu'
-import { addModelSource, downloadModel, fetchModelHub } from '@/services/models'
+import { addModelSource, fetchModelHub, pullModel } from '@/services/models'
 import { useDownloadStore } from '@/hooks/useDownloadStore'
 import { Progress } from '@/components/ui/progress'
 import HeaderPage from '@/containers/HeaderPage'
@@ -83,7 +83,7 @@ function Hub() {
   const hasTriggeredDownload = useRef(false)
 
   const { getProviderByName } = useModelProvider()
-  const llamaProvider = getProviderByName('llama.cpp')
+  const llamaProvider = getProviderByName('llamacpp')
 
   const toggleModelExpansion = (modelId: string) => {
     setExpandedModels((prev) => ({
@@ -213,7 +213,7 @@ function Hub() {
         search: {
           model: {
             id: modelId,
-            provider: 'llama.cpp',
+            provider: 'llamacpp',
           },
         },
       })
@@ -240,7 +240,7 @@ function Hub() {
       const handleDownload = () => {
         // Immediately set local downloading state
         addLocalDownloadingModel(modelId)
-        downloadModel(modelId)
+        pullModel(modelId, modelId)
       }
 
       return (
@@ -650,7 +650,7 @@ function Hub() {
                                               addLocalDownloadingModel(
                                                 variant.id
                                               )
-                                              downloadModel(variant.id)
+                                              pullModel(variant.id, variant.id)
                                             }}
                                           >
                                             <IconDownload
diff --git a/web-app/src/routes/index.tsx b/web-app/src/routes/index.tsx
index 89a9172c3..76314733c 100644
--- a/web-app/src/routes/index.tsx
+++ b/web-app/src/routes/index.tsx
@@ -37,7 +37,7 @@ function Index() {
   const hasValidProviders = providers.some(
     (provider) =>
       provider.api_key?.length ||
-      (provider.provider === 'llama.cpp' && provider.models.length)
+      (provider.provider === 'llamacpp' && provider.models.length)
   )
 
   useEffect(() => {
diff --git a/web-app/src/routes/settings/providers/$providerName.tsx b/web-app/src/routes/settings/providers/$providerName.tsx
index d15260908..8331584b2 100644
--- a/web-app/src/routes/settings/providers/$providerName.tsx
+++ b/web-app/src/routes/settings/providers/$providerName.tsx
@@ -6,7 +6,7 @@ import { cn, getProviderTitle } from '@/lib/utils'
 import { open } from '@tauri-apps/plugin-dialog'
 import {
   getActiveModels,
-  importModel,
+  pullModel,
   startModel,
   stopAllModels,
   stopModel,
@@ -35,7 +35,6 @@ import { Button } from '@/components/ui/button'
 import { IconFolderPlus, IconLoader, IconRefresh } from '@tabler/icons-react'
 import { getProviders } from '@/services/providers'
 import { toast } from 'sonner'
-import { ActiveModel } from '@/types/models'
 import { useEffect, useState } from 'react'
 import { predefinedProviders } from '@/mock/data'
 
@@ -73,7 +72,7 @@ function ProviderDetail() {
     },
   ]
   const { step } = useSearch({ from: Route.id })
-  const [activeModels, setActiveModels] = useState<ActiveModel[]>([])
+  const [activeModels, setActiveModels] = useState<string[]>([])
   const [loadingModels, setLoadingModels] = useState<string[]>([])
   const [refreshingModels, setRefreshingModels] = useState(false)
   const { providerName } = useParams({ from: Route.id })
@@ -171,10 +170,7 @@ function ProviderDetail() {
     if (provider)
       startModel(provider, modelId)
         .then(() => {
-          setActiveModels((prevModels) => [
-            ...prevModels,
-            { id: modelId } as ActiveModel,
-          ])
+          setActiveModels((prevModels) => [...prevModels, modelId])
         })
         .catch((error) => {
           console.error('Error starting model:', error)
@@ -189,7 +185,7 @@ function ProviderDetail() {
     stopModel(modelId)
       .then(() => {
         setActiveModels((prevModels) =>
-          prevModels.filter((model) => model.id !== modelId)
+          prevModels.filter((model) => model !== modelId)
         )
       })
       .catch((error) => {
@@ -240,7 +236,7 @@ function ProviderDetail() {
                 className={cn(
                   'flex flex-col gap-3',
                   provider &&
-                    provider.provider === 'llama.cpp' &&
+                    provider.provider === 'llamacpp' &&
                     'flex-col-reverse'
                 )}
               >
@@ -353,7 +349,7 @@ function ProviderDetail() {
                         {t('providers:models')}
                       </h1>
                       <div className="flex items-center gap-2">
-                        {provider && provider.provider !== 'llama.cpp' && (
+                        {provider && provider.provider !== 'llamacpp' && (
                           <>
                             {!predefinedProviders.some(
                               (p) => p.provider === provider.provider
@@ -388,7 +384,7 @@ function ProviderDetail() {
                             <DialogAddModel provider={provider} />
                           </>
                         )}
-                        {provider && provider.provider === 'llama.cpp' && (
+                        {provider && provider.provider === 'llamacpp' && (
                           <Button
                             variant="link"
                             size="sm"
@@ -404,10 +400,15 @@ function ProviderDetail() {
                                   },
                                 ],
                               })
+                              // If the dialog returns a file path, extract just the file name
+                              const fileName =
+                                typeof selectedFile === 'string'
+                                  ? selectedFile.split(/[\\/]/).pop()
+                                  : undefined
 
-                              if (selectedFile) {
+                              if (selectedFile && fileName) {
                                 try {
-                                  await importModel(selectedFile)
+                                  await pullModel(fileName, selectedFile)
                                 } catch (error) {
                                   console.error(
                                     t('providers:importModelError'),
@@ -475,46 +476,40 @@ function ProviderDetail() {
                                 provider={provider}
                                 modelId={model.id}
                               />
-                              {provider &&
-                                provider.provider === 'llama.cpp' && (
-                                  <div className="ml-2">
-                                    {activeModels.some(
-                                      (activeModel) =>
-                                        activeModel.id === model.id
-                                    ) ? (
-                                      <Button
-                                        size="sm"
-                                        variant="destructive"
-                                        onClick={() =>
-                                          handleStopModel(model.id)
-                                        }
-                                      >
-                                        {t('providers:stop')}
-                                      </Button>
-                                    ) : (
-                                      <Button
-                                        size="sm"
-                                        disabled={loadingModels.includes(
-                                          model.id
-                                        )}
-                                        onClick={() =>
-                                          handleStartModel(model.id)
-                                        }
-                                      >
-                                        {loadingModels.includes(model.id) ? (
-                                          <div className="flex items-center gap-2">
-                                            <IconLoader
-                                              size={16}
-                                              className="animate-spin"
-                                            />
-                                          </div>
-                                        ) : (
-                                          t('providers:start')
-                                        )}
-                                      </Button>
-                                    )}
-                                  </div>
-                                )}
+                              {provider && provider.provider === 'llamacpp' && (
+                                <div className="ml-2">
+                                  {activeModels.some(
+                                    (activeModel) => activeModel === model.id
+                                  ) ? (
+                                    <Button
+                                      size="sm"
+                                      variant="destructive"
+                                      onClick={() => handleStopModel(model.id)}
+                                    >
+                                      {t('providers:stop')}
+                                    </Button>
+                                  ) : (
+                                    <Button
+                                      size="sm"
+                                      disabled={loadingModels.includes(
+                                        model.id
+                                      )}
+                                      onClick={() => handleStartModel(model.id)}
+                                    >
+                                      {loadingModels.includes(model.id) ? (
+                                        <div className="flex items-center gap-2">
+                                          <IconLoader
+                                            size={16}
+                                            className="animate-spin"
+                                          />
+                                        </div>
+                                      ) : (
+                                        t('providers:start')
+                                      )}
+                                    </Button>
+                                  )}
+                                </div>
+                              )}
                             </div>
                           }
                         />
diff --git a/web-app/src/routes/system-monitor.tsx b/web-app/src/routes/system-monitor.tsx
index c94c1919a..1cf236448 100644
--- a/web-app/src/routes/system-monitor.tsx
+++ b/web-app/src/routes/system-monitor.tsx
@@ -5,10 +5,9 @@ import { getHardwareInfo } from '@/services/hardware'
 import { Progress } from '@/components/ui/progress'
 import type { HardwareData } from '@/hooks/useHardware'
 import { route } from '@/constants/routes'
-import { formatDuration, formatMegaBytes } from '@/lib/utils'
+import { formatMegaBytes } from '@/lib/utils'
 import { IconDeviceDesktopAnalytics } from '@tabler/icons-react'
 import { getActiveModels, stopModel } from '@/services/models'
-import { ActiveModel } from '@/types/models'
 import { Button } from '@/components/ui/button'
 import { useTranslation } from '@/i18n/react-i18next-compat'
 
@@ -21,7 +20,7 @@ function SystemMonitor() {
   const { t } = useTranslation()
   const { hardwareData, setHardwareData, updateCPUUsage, updateRAMAvailable } =
     useHardware()
-  const [activeModels, setActiveModels] = useState<ActiveModel[]>([])
+  const [activeModels, setActiveModels] = useState<string[]>([])
 
   useEffect(() => {
     // Initial data fetch
@@ -47,7 +46,7 @@ function SystemMonitor() {
     stopModel(modelId)
       .then(() => {
         setActiveModels((prevModels) =>
-          prevModels.filter((model) => model.id !== modelId)
+          prevModels.filter((model) => model !== modelId)
         )
       })
       .catch((error) => {
@@ -173,10 +172,10 @@ function SystemMonitor() {
         {activeModels.length > 0 && (
           <div className="flex flex-col gap-4">
             {activeModels.map((model) => (
-              <div className="bg-main-view-fg/3 rounded-lg p-4" key={model.id}>
+              <div className="bg-main-view-fg/3 rounded-lg p-4" key={model}>
                 <div className="flex justify-between items-center mb-2">
                   <span className="font-semibold text-main-view-fg">
-                    {model.id}
+                    {model}
                   </span>
                 </div>
                 <div className="flex flex-col gap-2 mt-3">
@@ -190,9 +189,9 @@ function SystemMonitor() {
                     <span className="text-main-view-fg/70">
                       {t('system-monitor:uptime')}
                     </span>
-                    <span className="text-main-view-fg">
+                    {/* <span className="text-main-view-fg">
                       {model.start_time && formatDuration(model.start_time)}
-                    </span>
+                    </span> */}
                   </div>
                   <div className="flex justify-between items-center">
                     <span className="text-main-view-fg/70">
@@ -202,7 +201,7 @@ function SystemMonitor() {
                       <Button
                         variant="destructive"
                         size="sm"
-                        onClick={() => stopRunningModel(model.id)}
+                        onClick={() => stopRunningModel(model)}
                       >
                         {t('system-monitor:stop')}
                       </Button>
diff --git a/web-app/src/services/models.ts b/web-app/src/services/models.ts
index 38749eea9..e5fa9bf00 100644
--- a/web-app/src/services/models.ts
+++ b/web-app/src/services/models.ts
@@ -1,56 +1,37 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
-import { ExtensionManager } from '@/lib/extension'
-import { normalizeProvider } from '@/lib/models'
-import { EngineManager, ExtensionTypeEnum, ModelExtension } from '@janhq/core'
+import { AIEngine, EngineManager, SettingComponentProps } from '@janhq/core'
 import { Model as CoreModel } from '@janhq/core'
 
+// TODO: Replace this with the actual provider later
+const defaultProvider = 'llamacpp'
+
+const getEngine = (provider: string = defaultProvider) => {
+  return EngineManager.instance().get(provider) as AIEngine
+}
 /**
  * Fetches all available models.
  * @returns A promise that resolves to the models.
  */
 export const fetchModels = async () => {
-  return ExtensionManager.getInstance()
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getModels()
+  return getEngine().list()
 }
 
 /**
  * Fetches the sources of the models.
  * @returns A promise that resolves to the model sources.
  */
-export const fetchModelSources = async (): Promise<any[]> => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) return []
-
-  try {
-    const sources = await extension.getSources()
-    const mappedSources = sources.map((m) => ({
-      ...m,
-      models: m.models.sort((a, b) => a.size - b.size),
-    }))
-
-    // Prepend the hardcoded model to the sources
-    return [...mappedSources]
-  } catch (error) {
-    console.error('Failed to fetch model sources:', error)
-    return []
-  }
+export const fetchModelSources = async () => {
+  // TODO: New Hub
+  return []
 }
 
 /**
  * Fetches the model hub.
  * @returns A promise that resolves to the model hub.
  */
-export const fetchModelHub = async (): Promise<any[]> => {
-  const hubData = await ExtensionManager.getInstance()
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.fetchModelsHub()
-
-  // Prepend the hardcoded model to the hub data
-  return hubData ? [...hubData] : []
+export const fetchModelHub = async () => {
+  // TODO: New Hub
+  return
 }
 
 /**
@@ -59,18 +40,9 @@ export const fetchModelHub = async (): Promise<any[]> => {
  * @returns A promise that resolves when the source is added.
  */
 export const addModelSource = async (source: string) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.addSource(source)
-  } catch (error) {
-    console.error('Failed to add model source:', error)
-    throw error
-  }
+  // TODO: New Hub
+  console.log(source)
+  return
 }
 
 /**
@@ -79,18 +51,9 @@ export const addModelSource = async (source: string) => {
  * @returns A promise that resolves when the source is deleted.
  */
 export const deleteModelSource = async (source: string) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.deleteSource(source)
-  } catch (error) {
-    console.error('Failed to delete model source:', error)
-    throw error
-  }
+  // TODO: New Hub
+  console.log(source)
+  return
 }
 
 /**
@@ -102,38 +65,19 @@ export const updateModel = async (
   model: Partial<CoreModel>
   // provider: string,
 ) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.updateModel(model)
-  } catch (error) {
-    console.error('Failed to update model:', error)
-    throw error
-  }
+  if (model.settings)
+    getEngine().updateSettings(model.settings as SettingComponentProps[])
 }
 
 /**
- * Downloads a model.
- * @param model The model to download.
+ * Pull or import a model.
+ * @param model The model to pull.
  * @returns A promise that resolves when the model download task is created.
  */
-export const downloadModel = async (id: string) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.pullModel(id)
-  } catch (error) {
-    console.error('Failed to download model:', error)
-    throw error
-  }
+export const pullModel = async (id: string, modelPath: string) => {
+  return getEngine().import(id, {
+    modelPath,
+  })
 }
 
 /**
@@ -142,18 +86,7 @@ export const downloadModel = async (id: string) => {
  * @returns
  */
 export const abortDownload = async (id: string) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.cancelModelPull(id)
-  } catch (error) {
-    console.error('Failed to abort model download:', error)
-    throw error
-  }
+  return getEngine().abortImport(id)
 }
 
 /**
@@ -162,64 +95,7 @@ export const abortDownload = async (id: string) => {
  * @returns
  */
 export const deleteModel = async (id: string) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.deleteModel(id).then(() => {
-      // TODO: This should be removed when we integrate new llama.cpp extension
-      if (id.includes(':')) {
-        extension.addSource(`cortexso/${id.split(':')[0]}`)
-      }
-    })
-  } catch (error) {
-    console.error('Failed to delete model:', error)
-    throw error
-  }
-}
-
-/**
- * Imports a model from a file path.
- * @param filePath The path to the model file or an array of file paths.
- * @param modelId Optional model ID. If not provided, it will be derived from the file name.
- * @param provider The provider for the model (default: 'llama.cpp').
- * @returns A promise that resolves when the model is imported.
- */
-export const importModel = async (
-  filePath: string | string[],
-  modelId?: string,
-  provider: string = 'llama.cpp'
-) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    // If filePath is an array, use the first element
-    const path = Array.isArray(filePath) ? filePath[0] : filePath
-
-    // If no path was selected, throw an error
-    if (!path) throw new Error('No file selected')
-
-    // Extract filename from path to use as model ID if not provided
-    const defaultModelId =
-      path
-        .split(/[/\\]/)
-        .pop()
-        ?.replace(/ /g, '-')
-        .replace(/\.gguf$/i, '') || path
-    const modelIdToUse = modelId || defaultModelId
-
-    return await extension.importModel(modelIdToUse, path, provider)
-  } catch (error) {
-    console.error('Failed to import model:', error)
-    throw error
-  }
+  return getEngine().delete(id)
 }
 
 /**
@@ -228,20 +104,8 @@ export const importModel = async (
  * @returns
  */
 export const getActiveModels = async (provider?: string) => {
-  const providerName = provider || 'cortex' // we will go down to llama.cpp extension later on
-  const extension = EngineManager.instance().get(providerName)
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return 'activeModels' in extension &&
-      typeof extension.activeModels === 'function'
-      ? ((await extension.activeModels()) ?? [])
-      : []
-  } catch (error) {
-    console.error('Failed to get active models:', error)
-    return []
-  }
+  // getEngine(provider)
+  return getEngine(provider).getLoadedModels()
 }
 
 /**
@@ -251,20 +115,7 @@ export const getActiveModels = async (provider?: string) => {
  * @returns
  */
 export const stopModel = async (model: string, provider?: string) => {
-  const providerName = provider || 'cortex' // we will go down to llama.cpp extension later on
-  const extension = EngineManager.instance().get(providerName)
-
-  if (!extension) throw new Error('Model extension not found')
-
-  try {
-    return await extension.unloadModel({
-      model,
-      id: model,
-    })
-  } catch (error) {
-    console.error('Failed to stop model:', error)
-    return []
-  }
+  getEngine(provider).unload(model)
 }
 
 /**
@@ -273,10 +124,7 @@ export const stopModel = async (model: string, provider?: string) => {
  */
 export const stopAllModels = async () => {
   const models = await getActiveModels()
-  if (models)
-    await Promise.all(
-      models.map((model: { id: string }) => stopModel(model.id))
-    )
+  if (models) await Promise.all(models.map((model) => stopModel(model)))
 }
 
 /**
@@ -289,28 +137,17 @@ export const stopAllModels = async () => {
  */
 export const startModel = async (
   provider: ProviderObject,
-  model: string,
-  abortController?: AbortController
+  model: string
 ): Promise<void> => {
-  const providerObj = EngineManager.instance().get(
-    normalizeProvider(provider.provider)
-  )
-  const modelObj = provider.models.find((m) => m.id === model)
-
-  if (providerObj && modelObj) {
-    return providerObj?.loadModel(
-      {
-        id: modelObj.id,
-        settings: Object.fromEntries(
-          Object.entries(modelObj.settings ?? {}).map(([key, value]) => [
-            key,
-            value.controller_props?.value, // assuming each setting is { value: ... }
-          ])
-        ),
-      },
-      abortController
-    )
-  }
+  getEngine(provider.provider)
+    .load(model)
+    .catch((error) => {
+      console.error(
+        `Failed to start model ${model} for provider ${provider.provider}:`,
+        error
+      )
+      throw error
+    })
 }
 
 /**
@@ -329,37 +166,16 @@ export const configurePullOptions = async ({
   verifyHostSSL,
   noProxy,
 }: ProxyOptions) => {
-  const extension = ExtensionManager.getInstance().get<ModelExtension>(
-    ExtensionTypeEnum.Model
-  )
-
-  if (!extension) throw new Error('Model extension not found')
-  try {
-    await extension.configurePullOptions(
-      proxyEnabled
-        ? {
-            proxy_username: proxyUsername,
-            proxy_password: proxyPassword,
-            proxy_url: proxyUrl,
-            verify_proxy_ssl: proxyIgnoreSSL ? false : verifyProxySSL,
-            verify_proxy_host_ssl: proxyIgnoreSSL ? false : verifyProxyHostSSL,
-            verify_peer_ssl: proxyIgnoreSSL ? false : verifyPeerSSL,
-            verify_host_ssl: proxyIgnoreSSL ? false : verifyHostSSL,
-            no_proxy: noProxy,
-          }
-        : {
-            proxy_username: '',
-            proxy_password: '',
-            proxy_url: '',
-            verify_proxy_ssl: false,
-            verify_proxy_host_ssl: false,
-            verify_peer_ssl: false,
-            verify_host_ssl: false,
-            no_proxy: '',
-          }
-    )
-  } catch (error) {
-    console.error('Failed to configure pull options:', error)
-    throw error
-  }
+  console.log('Configuring proxy options:', {
+    proxyEnabled,
+    proxyUrl,
+    proxyUsername,
+    proxyPassword,
+    proxyIgnoreSSL,
+    verifyProxySSL,
+    verifyProxyHostSSL,
+    verifyPeerSSL,
+    verifyHostSSL,
+    noProxy,
+  })
 }
diff --git a/web-app/src/services/providers.ts b/web-app/src/services/providers.ts
index c279620f2..9b5135861 100644
--- a/web-app/src/services/providers.ts
+++ b/web-app/src/services/providers.ts
@@ -1,11 +1,6 @@
 import { models as providerModels } from 'token.js'
 import { predefinedProviders } from '@/mock/data'
-import {
-  EngineManagementExtension,
-  EngineManager,
-  ExtensionTypeEnum,
-  SettingComponentProps,
-} from '@janhq/core'
+import { EngineManager, SettingComponentProps } from '@janhq/core'
 import {
   DefaultToolUseSupportedModels,
   ModelCapabilities,
@@ -17,11 +12,6 @@ import { fetch as fetchTauri } from '@tauri-apps/plugin-http'
 
 
 export const getProviders = async (): Promise<ModelProvider[]> => {
-  const engines = !localStorage.getItem('migration_completed')
-    ? await ExtensionManager.getInstance()
-        .get<EngineManagementExtension>(ExtensionTypeEnum.Engine)
-        ?.getEngines()
-    : {}
   const builtinProviders = predefinedProviders.map((provider) => {
     let models = provider.models as Model[]
     if (Object.keys(providerModels).includes(provider.provider)) {
@@ -29,29 +19,6 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
         provider.provider as unknown as keyof typeof providerModels
       ].models as unknown as string[]
 
-      if (engines && Object.keys(engines).length > 0) {
-        for (const [key, value] of Object.entries(engines)) {
-          const providerName = key.replace('google_gemini', 'gemini')
-          if (provider.provider !== providerName) continue
-          const engine = value[0] as
-            | {
-                api_key?: string
-                url?: string
-                engine?: string
-              }
-            | undefined
-          if (engine && 'api_key' in engine) {
-            const settings = provider?.settings.map((e) => {
-              if (e.key === 'api-key')
-                e.controller_props.value = (engine.api_key as string) ?? ''
-              return e
-            })
-
-            provider.settings = settings
-          }
-        }
-      }
-
       if (Array.isArray(builtInModels))
         models = builtInModels.map((model) => {
           const modelManifest = models.find((e) => e.id === model)
@@ -77,24 +44,11 @@ export const getProviders = async (): Promise<ModelProvider[]> => {
       models,
     }
   })
-  if (engines && Object.keys(engines).length > 0) {
-    localStorage.setItem('migration_completed', 'true')
-  }
 
   const runtimeProviders: ModelProvider[] = []
+  for (const [providerName, value] of EngineManager.instance().engines) {
+    const models = (await fetchModels()) ?? []
 
-  for (const [key, value] of EngineManager.instance().engines) {
-    // TODO: Remove this when the cortex extension is removed
-    const providerName = key === 'cortex' ? 'llama.cpp' : key
-
-    const models =
-      ((await fetchModels()) ?? []).filter(
-        (model) =>
-          (model.engine === 'llama-cpp' ? 'llama.cpp' : model.engine) ===
-            providerName &&
-          'status' in model &&
-          model.status === 'downloaded'
-      ) ?? []
     const provider: ModelProvider = {
       active: false,
       persist: true,
@@ -246,9 +200,8 @@ export const updateSettings = async (
   providerName: string,
   settings: ProviderSetting[]
 ): Promise<void> => {
-  const provider = providerName === 'llama.cpp' ? 'cortex' : providerName
   return ExtensionManager.getInstance()
-    .getEngine(provider)
+    .getEngine(providerName)
     ?.updateSettings(
       settings.map((setting) => ({
         ...setting,
diff --git a/web-app/src/services/threads.ts b/web-app/src/services/threads.ts
index 7d124bfd5..6a9ff4fc8 100644
--- a/web-app/src/services/threads.ts
+++ b/web-app/src/services/threads.ts
@@ -51,7 +51,7 @@ export const createThread = async (thread: Thread): Promise<Thread> => {
             ...(thread.assistants?.[0] ?? defaultAssistant),
             model: {
               id: thread.model?.id ?? '*',
-              engine: thread.model?.provider ?? 'llama.cpp',
+              engine: thread.model?.provider ?? 'llamacpp',
             },
           },
         ],
@@ -88,7 +88,7 @@ export const updateThread = (thread: Thread) => {
         return {
           model: {
             id: thread.model?.id ?? '*',
-            engine: thread.model?.provider ?? 'llama.cpp',
+            engine: thread.model?.provider ?? 'llamacpp',
           },
           id: e.id,
           name: e.name,
@@ -98,7 +98,7 @@ export const updateThread = (thread: Thread) => {
         {
           model: {
             id: thread.model?.id ?? '*',
-            engine: thread.model?.provider ?? 'llama.cpp',
+            engine: thread.model?.provider ?? 'llamacpp',
           },
           id: 'jan',
           name: 'Jan',
diff --git a/web-app/src/types/models.ts b/web-app/src/types/models.ts
index ed93cdbae..bf2fcc4a1 100644
--- a/web-app/src/types/models.ts
+++ b/web-app/src/types/models.ts
@@ -20,13 +20,3 @@ export enum DefaultToolUseSupportedModels {
   JanNano = 'jan-nano',
   Qwen3 = 'qwen3',
 }
-
-export type ActiveModel = {
-  engine: string
-  id: string
-  model_size: number
-  object: 'model'
-  ram: number
-  start_time: number
-  vram: number
-}