feat: model and cortex extensions update

2024-10-15 13:06:33 +07:00 · 2024-10-15 13:06:33 +07:00 · 4080dc4b65
commit 4080dc4b65
parent c6481d4668
132 changed files with 1416 additions and 3771 deletions
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
+import { events } from './events'
 import { fs } from './fs'
+import { ModelManager } from './models'

 export enum ExtensionTypeEnum {
  Assistant = 'assistant',
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
    return undefined
  }

+  /**
+   * Registers models - it persists in-memory shared ModelManager instance's data map.
+   * @param models
+   */
+  async registerModels(models: Model[]): Promise<void> {
+    for (const model of models) {
+      ModelManager.instance().register(model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Register settings for the extension.
+   * @param settings
+   * @returns
+   */
  async registerSettings(settings: SettingComponentProps[]): Promise<void> {
    if (!this.name) {
      console.error('Extension name is not defined')
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }

+  /**
+   * Get the setting value for the key.
+   * @param key
+   * @param defaultValue
+   * @returns
+   */
  async getSetting<T>(key: string, defaultValue: T) {
    const keySetting = (await this.getSettings()).find((setting) => setting.key === key)

@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
    return
  }

+  /**
+   * Get the settings for the extension.
+   * @returns
+   */
  async getSettings(): Promise<SettingComponentProps[]> {
    if (!this.name) return []

@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }

+  /**
+   * Update the settings for the extension.
+   * @param componentProps
+   * @returns
+   */
  async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
    if (!this.name) return

--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
-import { EngineManager } from './EngineManager'
-import { fs } from '../../fs'
+import { ModelEvent, Model } from '../../../types'

 jest.mock('../../events')
 jest.mock('./EngineManager')
@ -26,7 +24,7 @@ describe('AIEngine', () => {
  })

  it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any

    await engine.loadModel(model)

@ -34,7 +32,7 @@ describe('AIEngine', () => {
  })

  it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any

    await engine.loadModel(model)

--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -1,17 +1,14 @@
-import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
-import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
+import { ModelManager } from '../../models/manager'

 /**
 * Base AIEngine
 * Applicable to all AI Engines
 */
 export abstract class AIEngine extends BaseExtension {
-  private static modelsFolder = 'models'
-
  // The inference engine
  abstract provider: string

@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
  override onLoad() {
    this.registerEngine()

-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }

@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
    EngineManager.instance().register(this)
  }

-  async registerModels(models: Model[]): Promise<void> {
-    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
-
-    let shouldNotifyModelUpdate = false
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-      const isExist = await fs.existsSync(modelPath)
-
-      if (isExist) {
-        await this.migrateModelIfNeeded(model, modelPath)
-        continue
-      }
-
-      await fs.mkdir(modelPath)
-      await fs.writeFileSync(
-        await joinPath([modelPath, 'model.json']),
-        JSON.stringify(model, null, 2)
-      )
-      shouldNotifyModelUpdate = true
-    }
-
-    if (shouldNotifyModelUpdate) {
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-  }
-
-  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
-    try {
-      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
-      const currentModel: Model = JSON.parse(modelJson)
-      if (currentModel.version !== model.version) {
-        await fs.writeFileSync(
-          await joinPath([modelPath, 'model.json']),
-          JSON.stringify(model, null, 2)
-        )
-
-        events.emit(ModelEvent.OnModelsUpdate, {})
-      }
-    } catch (error) {
-      console.warn('Error while try to migrating model', error)
-    }
-  }
-
  /**
   * Loads the model.
   */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
    if (model.engine.toString() !== this.provider) return Promise.resolve()
    events.emit(ModelEvent.OnModelReady, model)
    return Promise.resolve()
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@ -1,3 +1,4 @@
+import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'

 /**
@ -20,6 +21,22 @@ export class EngineManager {
   * @returns The engine, if found.
   */
  get<T extends AIEngine>(provider: string): T | undefined {
+    // Backward compatible provider
+    // nitro is migrated to cortex
+    if (
+      [
+        InferenceEngine.nitro,
+        InferenceEngine.cortex,
+        InferenceEngine.cortex_llamacpp,
+        InferenceEngine.cortex_onnx,
+        InferenceEngine.cortex_tensorrtllm,
+        InferenceEngine.cortex_onnx,
+      ]
+        .map((e) => e.toString())
+        .includes(provider)
+    )
+      provider = InferenceEngine.cortex
+
    return this.engines.get(provider) as T | undefined
  }

@ -27,6 +44,6 @@ export class EngineManager {
   * The instance of the engine manager.
   */
  static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
  }
 }
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@ -3,7 +3,7 @@
 */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'

 jest.mock('../../core', () => ({
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
  })

  it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: null }
@ -54,7 +54,6 @@ describe('LocalOAIEngine', () => {

    await engine.loadModel(model)

-    expect(dirName).toHaveBeenCalledWith(model.file_path)
    expect(systemInformation).toHaveBeenCalled()
    expect(executeOnMain).toHaveBeenCalledWith(
      engine.nodeModule,
@ -66,7 +65,7 @@ describe('LocalOAIEngine', () => {
  })

  it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: 'load error' }
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@ -1,6 +1,6 @@
 import { executeOnMain, systemInformation, dirName } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'

 /**
@ -22,35 +22,36 @@ export abstract class LocalOAIEngine extends OAIEngine {
  override onLoad() {
    super.onLoad()
    // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }

  /**
   * Load the model.
   */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
    if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
-    const systemInfo = await systemInformation()
-    const res = await executeOnMain(
-      this.nodeModule,
-      this.loadModelFunctionName,
-      {
-        modelFolder,
-        model,
-      },
-      systemInfo
-    )
+    // const modelFolder = await dirName(model.file_path)
+    // const systemInfo = await systemInformation()
+    // const res = await executeOnMain(
+    //   this.nodeModule,
+    //   this.loadModelFunctionName,
+    //   {
+    //     modelFolder,
+    //     model,
+    //   },
+    //   systemInfo
+    // )

-    if (res?.error) {
-      events.emit(ModelEvent.OnModelFail, { error: res.error })
-      return Promise.reject(res.error)
-    } else {
-      this.loadedModel = model
-      events.emit(ModelEvent.OnModelReady, model)
-      return Promise.resolve()
-    }
+    // if (res?.error) {
+    //   events.emit(ModelEvent.OnModelFail, { error: res.error })
+    //   return Promise.reject(res.error)
+    // } else {
+    //   this.loadedModel = model
+    //   events.emit(ModelEvent.OnModelReady, model)
+    //   return Promise.resolve()
+    // }
+    return Promise.resolve()
  }
  /**
   * Stops the model.
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
   * Inference request
   */
  override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
+      events.emit(MessageEvent.OnMessageResponse, {
+        status: MessageStatus.Error,
+        content: [
+          {
+            type: ContentType.Text,
+            text: {
+              value: 'No model ID provided',
+              annotations: [],
+            },
+          },
+        ],
+      })
+      return
+    }

    const timestamp = Date.now()
    const message: ThreadMessage = {
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
      model: model.id,
      stream: true,
      ...model.parameters,
-      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
    }
    if (this.transformPayload) {
      requestBody = this.transformPayload(requestBody)
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -10,7 +10,7 @@ export function requestInference(
  requestBody: any,
  model: {
    id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
  },
  controller?: AbortController,
  headers?: HeadersInit,
@ -22,7 +22,7 @@ export function requestInference(
      headers: {
        'Content-Type': 'application/json',
        'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
        ...headers,
      },
      body: JSON.stringify(requestBody),
@ -45,7 +45,7 @@ export function requestInference(
          subscriber.complete()
          return
        }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
          const data = await response.json()
          if (transformResponse) {
            subscriber.next(transformResponse(data))
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
-  GpuSetting,
-  HuggingFaceRepoData,
-  ImportingModel,
-  Model,
-  ModelFile,
-  ModelInterface,
-  OptionType,
-} from '../../types'
+import { Model, ModelInterface, OptionType } from '../../types'

 /**
 * Model extension for managing models.
@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
    return ExtensionTypeEnum.Model
  }

-  abstract downloadModel(
-    model: Model,
-    gpuSettings?: GpuSetting,
-    network?: { proxy: string; ignoreSSL?: boolean }
-  ): Promise<void>
-  abstract cancelModelDownload(modelId: string): Promise<void>
-  abstract deleteModel(model: ModelFile): Promise<void>
-  abstract getDownloadedModels(): Promise<ModelFile[]>
-  abstract getConfiguredModels(): Promise<ModelFile[]>
-  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
-  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
-  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
-  abstract getDefaultModel(): Promise<Model>
+  abstract getModels(): Promise<Model[]>
+  abstract pullModel(model: string): Promise<void>
+  abstract cancelModelPull(modelId: string): Promise<void>
+  abstract importModel(model: string, modePath: string): Promise<void>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
+  abstract deleteModel(model: string): Promise<void>
 }
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@ -1,32 +1,37 @@
-import * as Core from './core';
-import * as Events from './events';
-import * as FileSystem from './fs';
-import * as Extension from './extension';
-import * as Extensions from './extensions';
-import * as Tools from './tools';
+import * as Core from './core'
+import * as Events from './events'
+import * as FileSystem from './fs'
+import * as Extension from './extension'
+import * as Extensions from './extensions'
+import * as Tools from './tools'
+import * as Models from './models'

 describe('Module Tests', () => {
-    it('should export Core module', () => {
-        expect(Core).toBeDefined();
-    });
+  it('should export Core module', () => {
+    expect(Core).toBeDefined()
+  })

-    it('should export Event module', () => {
-        expect(Events).toBeDefined();
-    });
+  it('should export Event module', () => {
+    expect(Events).toBeDefined()
+  })

-    it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
-    });
+  it('should export Filesystem module', () => {
+    expect(FileSystem).toBeDefined()
+  })

-    it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
-    });
+  it('should export Extension module', () => {
+    expect(Extension).toBeDefined()
+  })

-    it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
-    });
+  it('should export all base extensions', () => {
+    expect(Extensions).toBeDefined()
+  })

-    it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
-    });
-});
+  it('should export all base tools', () => {
+    expect(Tools).toBeDefined()
+  })
+
+  it('should export all base tools', () => {
+    expect(Models).toBeDefined()
+  })
+})
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@ -33,3 +33,9 @@ export * from './extensions'
 * @module
 */
 export * from './tools'
+
+/**
+ * Export all base models.
+ * @module
+ */
+export * from './models'
--- a/core/src/browser/models/index.ts
+++ b/core/src/browser/models/index.ts
@ -0,0 +1,5 @@
+/**
+ * Export ModelManager
+ * @module
+ */
+export { ModelManager } from './manager'
--- a/core/src/browser/models/manager.ts
+++ b/core/src/browser/models/manager.ts
@ -0,0 +1,40 @@
+import { Model, ModelEvent } from '../../types'
+import { events } from '../events'
+
+/**
+ * Manages the registered models across extensions.
+ */
+export class ModelManager {
+  public models = new Map<string, Model>()
+
+  constructor() {
+    if (window) {
+      window.core.modelManager = this
+    }
+  }
+
+  /**
+   * Registers a model.
+   * @param model - The model to register.
+   */
+  register<T extends Model>(model: T) {
+    this.models.set(model.id, model)
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Retrieves a model by it's id.
+   * @param id - The id of the model to retrieve.
+   * @returns The model, if found.
+   */
+  get<T extends Model>(id: string): T | undefined {
+    return this.models.get(id) as T | undefined
+  }
+
+  /**
+   * The instance of the tool manager.
+   */
+  static instance(): ModelManager {
+    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
+  }
+}
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -69,9 +69,11 @@ export enum DownloadRoute {
 }

 export enum DownloadEvent {
-  onFileDownloadUpdate = 'onFileDownloadUpdate',
-  onFileDownloadError = 'onFileDownloadError',
-  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadUpdate = 'DownloadUpdated',
+  onFileDownloadError = 'DownloadError',
+  onFileDownloadSuccess = 'DownloadSuccess',
+  onFileDownloadStopped = 'DownloadStopped',
+  onFileDownloadStarted = 'DownloadStarted',
  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }

--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
 */
 export type ModelInfo = {
  id: string
-  settings: ModelSettingParams
-  parameters: ModelRuntimeParams
+  settings?: ModelSettingParams
+  parameters?: ModelRuntimeParams
  engine?: InferenceEngine
 }

@ -28,9 +28,10 @@ export enum InferenceEngine {
  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
  cohere = 'cohere',
  nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
-  cortex_onnx = 'cortex.onnx',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex = 'cortex',
+  cortex_llamacpp = 'llama-cpp',
+  cortex_onnx = 'onnxruntime',
+  cortex_tensorrtllm = '.tensorrt-llm',
 }

 export type ModelArtifact = {
@ -153,8 +154,3 @@ export type ModelRuntimeParams = {
 export type ModelInitFailed = Model & {
  error: Error
 }
-
-/**
- * ModelFile is the model.json entity and it's file metadata
- */
-export type ModelFile = Model & FileMetadata
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
-import { Model, ModelFile } from './modelEntity'
+import { Model } from './modelEntity'
+import { OptionType } from './modelImport'

 /**
 * Model extension for managing models.
@ -8,38 +8,41 @@ export interface ModelInterface {
  /**
   * Downloads a model.
   * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
   * @returns A Promise that resolves when the model has been downloaded.
   */
-  downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void>
+  pullModel(model: string): Promise<void>

  /**
   * Cancels the download of a specific model.
   * @param {string} modelId - The ID of the model to cancel the download for.
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(modelId: string): Promise<void>

  /**
   * Deletes a model.
   * @param modelId - The ID of the model to delete.
   * @returns A Promise that resolves when the model has been deleted.
   */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>

  /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
   * @returns A Promise that resolves with an array of downloaded models.
   */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>

  /**
-   * Gets a list of configured models.
-   * @returns A Promise that resolves with an array of configured models.
+   * Update a pulled model's metadata
+   * @param model - The model to update.
+   * @returns A Promise that resolves when the model has been updated.
   */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
+
+  /**
+   * Import an existing model file.
+   * @param model id of the model to import
+   * @param modelPath - path of the model file
+   */
+  importModel(model: string, modePath: string): Promise<void>
 }
--- a/docs/src/pages/docs/built-in/llama-cpp.mdx
+++ b/docs/src/pages/docs/built-in/llama-cpp.mdx
@ -102,7 +102,7 @@ Enable the GPU acceleration option within the Jan application by following the [
    ],
    "size": 669000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
 ```
 ### Step 2: Modify the `model.json`
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'

-import path from 'path'
-
 export class Retrieval {
  public chunkSize: number = 100
  public chunkOverlap?: number = 0
--- a/extensions/inference-cortex-extension/.gitignore
+++ b/extensions/inference-cortex-extension/.gitignore
--- a/extensions/inference-cortex-extension/README.md
+++ b/extensions/inference-cortex-extension/README.md
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -4,10 +4,10 @@ set /p CORTEX_VERSION=<./bin/version.txt

@REM Download cortex.llamacpp binaries
 set VERSION=v0.1.25
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64
 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan

-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Read CORTEX_VERSION
+CORTEX_VERSION=$(cat ./bin/version.txt)
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34"
+# Detect platform
+OS_TYPE=$(uname)
+
+if [ "$OS_TYPE" == "Linux" ]; then
+    # Linux downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
+    chmod +x "./bin/cortex"
+
+    # Download engines for Linux
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
+
+elif [ "$OS_TYPE" == "Darwin" ]; then
+    # macOS downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz"  -e --strip 1 -o "./bin" 1
+    chmod +x "./bin/cortex"
+
+    # Download engines for macOS
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
+
+else
+    echo "Unsupported operating system: $OS_TYPE"
+    exit 1
+fi
--- a/extensions/inference-cortex-extension/jest.config.js
+++ b/extensions/inference-cortex-extension/jest.config.js
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -10,12 +10,12 @@
  "scripts": {
    "test": "jest",
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "downloadcortex:linux:darwin": "./download.sh",
+    "downloadcortex:win32": "download.bat",
+    "downloadcortex": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
  },
  "exports": {
    ".": "./dist/index.js",
@ -50,6 +50,7 @@
    "cpu-instructions": "^0.0.13",
    "decompress": "^4.2.1",
    "fetch-retry": "^5.0.6",
+    "ky": "^1.7.2",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
    "terminate": "2.6.1",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
--- a/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@ -31,5 +31,5 @@
      "tags": ["34B", "Finetuned"],
      "size": 21556982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@ -31,5 +31,5 @@
      "tags": ["7B", "Finetuned"],
      "size": 5056982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@ -31,5 +31,5 @@
    "tags": ["Vision"],
    "size": 5750000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@ -31,6 +31,6 @@
      "tags": ["22B", "Finetuned", "Featured"],
      "size": 13341237440
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@ -31,6 +31,6 @@
      "tags": ["34B", "Finetuned"],
      "size": 21500000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny"],
    "size": 1430000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["33B"],
    "size": 19940000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@ -31,5 +31,5 @@
    "tags": ["2B", "Finetuned", "Tiny"],
    "size": 1630000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 5330000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 16600000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@ -38,5 +38,5 @@
    ],
    "size": 1710000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 5760000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@ -31,5 +31,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 43920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Foundational Model"],
    "size": 4080000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@ -31,5 +31,5 @@
      "tags": ["8B"],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 42500000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 4920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["1B", "Featured"],
    "size": 1320000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["3B", "Featured"],
    "size": 3420000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 1170000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@ -32,5 +32,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@ -30,5 +30,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 26440000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["Recommended", "7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 2320000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@ -34,5 +34,5 @@
      ],
      "size": 8366000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["34B", "Finetuned"],
    "size": 20220000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4770000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Finetuned"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["14B", "Featured"],
      "size": 8990000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["32B"],
      "size": 19900000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["72B"],
      "size": 47400000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Featured"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Featured"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
@ -31,5 +31,5 @@
      "tags": ["3B", "Finetuned", "Tiny"],
      "size": 2970000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny", "Foundation Model"],
    "size": 669000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
@ -31,5 +31,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Finetuned"],
      "size": 4410000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
@ -31,5 +31,5 @@
    "tags": ["Recommended", "13B", "Finetuned"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["34B", "Foundational Model"],
    "size": 20660000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@ -114,19 +114,7 @@ export default [
        ]),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        INFERENCE_URL: JSON.stringify(
-          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/server/chat_completion'
-        ),
-        TROUBLESHOOTING_URL: JSON.stringify(
-          'https://jan.ai/guides/troubleshooting'
-        ),
-        JAN_SERVER_INFERENCE_URL: JSON.stringify(
-          'http://localhost:1337/v1/chat/completions'
-        ),
-        CUDA_DOWNLOAD_URL: JSON.stringify(
-          'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
-        ),
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291/v1'),
      }),
      // Allow json resolution
      json(),
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,7 +1,5 @@
 declare const NODE: string
-declare const INFERENCE_URL: string
-declare const TROUBLESHOOTING_URL: string
-declare const JAN_SERVER_INFERENCE_URL: string
+declare const CORTEX_API_URL: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>

--- a/extensions/inference-cortex-extension/src/babel.config.js
+++ b/extensions/inference-cortex-extension/src/babel.config.js
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -0,0 +1,111 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module inference-extension/src/index
+ */
+
+import {
+  Model,
+  executeOnMain,
+  systemInformation,
+  log,
+  joinPath,
+  dirName,
+  LocalOAIEngine,
+  InferenceEngine,
+} from '@janhq/core'
+
+import ky from 'ky'
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class JanInferenceCortexExtension extends LocalOAIEngine {
+  // DEPRECATED
+  nodeModule: string = 'node'
+
+  provider: string = InferenceEngine.cortex
+
+  /**
+   * The URL for making inference requests.
+   */
+  inferenceUrl = `${CORTEX_API_URL}/chat/completions`
+
+  /**
+   * Subscribes to events emitted by the @janhq/core package.
+   */
+  async onLoad() {
+    const models = MODELS as Model[]
+
+    this.registerModels(models)
+
+    super.onLoad()
+
+    // Run the process watchdog
+    const systemInfo = await systemInformation()
+    executeOnMain(NODE, 'run', systemInfo)
+  }
+
+  onUnload(): void {
+    executeOnMain(NODE, 'dispose')
+    super.onUnload()
+  }
+
+  override async loadModel(
+    model: Model & { file_path?: string }
+  ): Promise<void> {
+    // Legacy model cache - should import
+    if (model.engine === InferenceEngine.nitro && model.file_path) {
+      // Try importing the model
+      await ky
+        .post(`${CORTEX_API_URL}/models/${model.id}`, {
+          json: { model: model.id, modelPath: await this.modelPath(model) },
+        })
+        .json()
+        .catch((e) => log(e.message ?? e ?? ''))
+    }
+
+    return ky
+      .post(`${CORTEX_API_URL}/models/start`, {
+        json: {
+          ...model.settings,
+          model: model.id,
+          engine:
+            model.engine === InferenceEngine.nitro // Legacy model cache
+              ? InferenceEngine.cortex_llamacpp
+              : model.engine,
+        },
+      })
+      .json()
+      .catch(async (e) => {
+        throw (await e.response?.json()) ?? e
+      })
+      .then()
+  }
+
+  override async unloadModel(model: Model): Promise<void> {
+    return ky
+      .post(`${CORTEX_API_URL}/models/stop`, {
+        json: { model: model.id },
+      })
+      .json()
+      .then()
+  }
+
+  private async modelPath(
+    model: Model & { file_path?: string }
+  ): Promise<string> {
+    if (!model.file_path) return model.id
+    return await joinPath([
+      await dirName(model.file_path),
+      model.sources[0]?.filename ??
+        model.settings?.llama_model_path ??
+        model.sources[0]?.url.split('/').pop() ??
+        model.id,
+    ])
+  }
+}
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableNitroFile } from './execute'
+import { executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'

@ -30,7 +30,7 @@ jest.mock('cpu-instructions', () => ({
 let mock = cpuInfo.cpuInfo as jest.Mock
 mock.mockReturnValue([])

-describe('test executable nitro file', () => {
+describe('test executable cortex file', () => {
  afterAll(function () {
    Object.defineProperty(process, 'platform', {
      value: originalPlatform,
@ -44,10 +44,13 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'arm64',
    })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`mac-arm64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`/cortex`)
+            : expect.anything(),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -55,10 +58,13 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`mac-x64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`/cortex`)
+            : expect.anything(),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -73,10 +79,10 @@ describe('test executable nitro file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`win`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -107,10 +113,10 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`win-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -141,10 +147,10 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`win-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -159,10 +165,10 @@ describe('test executable nitro file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`linux`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -193,10 +199,10 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`linux-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -227,10 +233,10 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
        enginePath: expect.stringContaining(`linux-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -251,10 +257,10 @@ describe('test executable nitro file', () => {
    cpuInstructions.forEach((instruction) => {
      mock.mockReturnValue([instruction])

-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
          enginePath: expect.stringContaining(`linux-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),

          cudaVisibleDevices: '',
          vkVisibleDevices: '',
@ -274,10 +280,10 @@ describe('test executable nitro file', () => {
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
          enginePath: expect.stringContaining(`win-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          executablePath: expect.stringContaining(`/cortex.exe`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
@ -313,10 +319,10 @@ describe('test executable nitro file', () => {
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
          enginePath: expect.stringContaining(`win-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          executablePath: expect.stringContaining(`/cortex.exe`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -352,10 +358,10 @@ describe('test executable nitro file', () => {
    }
    cpuInstructions.forEach((instruction) => {
      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
          enginePath: expect.stringContaining(`linux-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -392,10 +398,10 @@ describe('test executable nitro file', () => {
    }
    cpuInstructions.forEach((instruction) => {
      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
          enginePath: expect.stringContaining(`linux-vulkan`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -418,10 +424,13 @@ describe('test executable nitro file', () => {
        run_mode: 'cpu',
      }
      mock.mockReturnValue([])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
          enginePath: expect.stringContaining(`mac-x64`),
-          executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+          executablePath:
+            originalPlatform === 'darwin'
+              ? expect.stringContaining(`/cortex`)
+              : expect.anything(),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@ -2,7 +2,7 @@ import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'

-export interface NitroExecutableOptions {
+export interface CortexExecutableOptions {
  enginePath: string
  executablePath: string
  cudaVisibleDevices: string
@ -81,9 +81,9 @@ const cpuInstructions = () => {
 * Find which executable file to run based on the current platform.
 * @returns The name of the executable file to run.
 */
-export const executableNitroFile = (
+export const executableCortexFile = (
  gpuSetting?: GpuSetting
-): NitroExecutableOptions => {
+): CortexExecutableOptions => {
  let engineFolder = [
    os(),
    ...(gpuSetting?.vulkan
@ -99,7 +99,7 @@ export const executableNitroFile = (
    .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
+  let binaryName = `cortex${extension()}`

  return {
    enginePath: path.join(__dirname, '..', 'bin', engineFolder),
--- a/extensions/inference-cortex-extension/src/node/index.test.ts
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@ -0,0 +1,94 @@
+jest.mock('@janhq/core/node', () => ({
+  ...jest.requireActual('@janhq/core/node'),
+  getJanDataFolderPath: () => '',
+  getSystemResourceInfo: () => {
+    return {
+      cpu: {
+        cores: 1,
+        logicalCores: 1,
+        threads: 1,
+        model: 'model',
+        speed: 1,
+      },
+      memory: {
+        total: 1,
+        free: 1,
+      },
+      gpu: {
+        model: 'model',
+        memory: 1,
+        cuda: {
+          version: 'version',
+          devices: 'devices',
+        },
+        vulkan: {
+          version: 'version',
+          devices: 'devices',
+        },
+      },
+    }
+  },
+}))
+
+jest.mock('fs', () => ({
+  default: {
+    readdirSync: () => [],
+  },
+}))
+
+jest.mock('child_process', () => ({
+  exec: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+    }
+  },
+  spawn: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+      pid: '111',
+    }
+  },
+}))
+
+jest.mock('./execute', () => ({
+  executableCortexFile: () => {
+    return {
+      enginePath: 'enginePath',
+      executablePath: 'executablePath',
+      cudaVisibleDevices: 'cudaVisibleDevices',
+      vkVisibleDevices: 'vkVisibleDevices',
+    }
+  },
+}))
+
+import index from './index'
+
+describe('dispose', () => {
+  it('should dispose a model successfully on Mac', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+
+    // Call the dispose function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+
+  it('should kill the subprocess successfully on Windows', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'win32',
+    })
+
+    // Call the killSubprocess function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+})
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -0,0 +1,83 @@
+import path from 'path'
+import { log, SystemInformation } from '@janhq/core/node'
+import { executableCortexFile } from './execute'
+import { ProcessWatchdog } from './watchdog'
+
+// The HOST address to use for the Nitro subprocess
+const LOCAL_PORT = '39291'
+let watchdog: ProcessWatchdog | undefined = undefined
+
+/**
+ * Spawns a Nitro subprocess.
+ * @returns A promise that resolves when the Nitro subprocess is started.
+ */
+function run(systemInfo?: SystemInformation): Promise<any> {
+  log(`[CORTEX]:: Spawning cortex subprocess...`)
+
+  return new Promise<void>(async (resolve, reject) => {
+    let executableOptions = executableCortexFile(
+      // If ngl is not set or equal to 0, run on CPU with correct instructions
+      systemInfo?.gpuSetting
+        ? {
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
+        : undefined
+    )
+
+    // Execute the binary
+    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
+    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
+
+    // Add engine path to the PATH and LD_LIBRARY_PATH
+    process.env.PATH = (process.env.PATH || '').concat(
+      path.delimiter,
+      executableOptions.enginePath
+    )
+    log(`[CORTEX] PATH: ${process.env.PATH}`)
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      executableOptions.enginePath
+    )
+
+    watchdog = new ProcessWatchdog(
+      executableOptions.executablePath,
+      ['--start-server', '--port', LOCAL_PORT.toString()],
+      {
+        cwd: executableOptions.enginePath,
+        env: {
+          ...process.env,
+          ENGINE_PATH: executableOptions.enginePath,
+          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+          // Vulkan - Support 1 device at a time for now
+          ...(executableOptions.vkVisibleDevices?.length > 0 && {
+            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
+          }),
+        },
+      }
+    )
+    watchdog.start()
+    resolve()
+  })
+}
+
+/**
+ * Every module should have a dispose function
+ * This will be called when the extension is unloaded and should clean up any resources
+ * Also called when app is closed
+ */
+function dispose() {
+  watchdog?.terminate()
+}
+
+/**
+ * Cortex process info
+ */
+export interface CortexProcessInfo {
+  isRunning: boolean
+}
+
+export default {
+  run,
+  dispose,
+}
--- a/extensions/inference-cortex-extension/src/node/watchdog.ts
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@ -0,0 +1,84 @@
+import { log } from '@janhq/core/node'
+import { spawn, ChildProcess } from 'child_process'
+import { EventEmitter } from 'events'
+
+interface WatchdogOptions {
+  cwd?: string
+  restartDelay?: number
+  maxRestarts?: number
+  env?: NodeJS.ProcessEnv
+}
+
+export class ProcessWatchdog extends EventEmitter {
+  private command: string
+  private args: string[]
+  private options: WatchdogOptions
+  private process: ChildProcess | null
+  private restartDelay: number
+  private maxRestarts: number
+  private restartCount: number
+  private isTerminating: boolean
+
+  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
+    super()
+    this.command = command
+    this.args = args
+    this.options = options
+    this.process = null
+    this.restartDelay = options.restartDelay || 5000
+    this.maxRestarts = options.maxRestarts || 5
+    this.restartCount = 0
+    this.isTerminating = false
+  }
+
+  start(): void {
+    this.spawnProcess()
+  }
+
+  private spawnProcess(): void {
+    if (this.isTerminating) return
+
+    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
+    this.process = spawn(this.command, this.args, this.options)
+
+    this.process.stdout?.on('data', (data: Buffer) => {
+      log(`Process output: ${data}`)
+      this.emit('output', data.toString())
+    })
+
+    this.process.stderr?.on('data', (data: Buffer) => {
+      log(`Process error: ${data}`)
+      this.emit('error', data.toString())
+    })
+
+    this.process.on('close', (code: number | null) => {
+      log(`Process exited with code ${code}`)
+      this.emit('close', code)
+      if (!this.isTerminating) {
+        this.restartProcess()
+      }
+    })
+  }
+
+  private restartProcess(): void {
+    if (this.restartCount < this.maxRestarts) {
+      this.restartCount++
+      log(
+        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
+      )
+      setTimeout(() => this.spawnProcess(), this.restartDelay)
+    } else {
+      log('Max restart attempts reached. Exiting watchdog.')
+      this.emit('maxRestartsReached')
+    }
+  }
+
+  terminate(): void {
+    this.isTerminating = true
+    if (this.process) {
+      log('Terminating watched process...')
+      this.process.kill()
+    }
+    this.emit('terminated')
+  }
+}
--- a/extensions/inference-cortex-extension/tsconfig.json
+++ b/extensions/inference-cortex-extension/tsconfig.json
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ b/extensions/inference-nitro-extension/bin/version.txt
@ -1 +0,0 @@
-0.5.0
--- a/extensions/inference-nitro-extension/download.sh
+++ b/extensions/inference-nitro-extension/download.sh
@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Read CORTEX_VERSION
-CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-
-# Detect platform
-OS_TYPE=$(uname)
-
-if [ "$OS_TYPE" == "Linux" ]; then
-    # Linux downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
-    chmod +x "./bin/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
-
-    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
-
-elif [ "$OS_TYPE" == "Darwin" ]; then
-    # macOS downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz"  -e --strip 1 -o "./bin/mac-arm64" 1
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz"  -e --strip 1 -o "./bin/mac-x64" 1
-    chmod +x "./bin/mac-arm64/cortex-cpp"
-    chmod +x "./bin/mac-x64/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
-    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
-
-else
-    echo "Unsupported operating system: $OS_TYPE"
-    exit 1
-fi
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -1,193 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-extension/src/index
- */
-
-import {
-  events,
-  executeOnMain,
-  Model,
-  ModelEvent,
-  LocalOAIEngine,
-  InstallationState,
-  systemInformation,
-  fs,
-  getJanDataFolderPath,
-  joinPath,
-  DownloadRequest,
-  baseName,
-  downloadFile,
-  DownloadState,
-  DownloadEvent,
-  ModelFile,
-} from '@janhq/core'
-
-declare const CUDA_DOWNLOAD_URL: string
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceNitroExtension extends LocalOAIEngine {
-  nodeModule: string = NODE
-  provider: string = 'nitro'
-
-  /**
-   * Checking the health for Nitro's process each 5 secs.
-   */
-  private static readonly _intervalHealthCheck = 5 * 1000
-
-  /**
-   * The interval id for the health check. Used to stop the health check.
-   */
-  private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined
-
-  /**
-   * Tracking the current state of nitro process.
-   */
-  private nitroProcessInfo: any = undefined
-
-  /**
-   * The URL for making inference requests.
-   */
-  inferenceUrl = ''
-
-  /**
-   * Subscribes to events emitted by the @janhq/core package.
-   */
-  async onLoad() {
-    this.inferenceUrl = INFERENCE_URL
-
-    // If the extension is running in the browser, use the base API URL from the core package.
-    if (!('electronAPI' in window)) {
-      this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
-    }
-
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    const models = MODELS as unknown as Model[]
-    this.registerModels(models)
-    super.onLoad()
-
-    // Add additional dependencies PATH to the env
-    executeOnMain(NODE, 'addAdditionalDependencies', {
-      name: this.name,
-      version: this.version,
-    })
-  }
-
-  /**
-   * Periodically check for nitro process's health.
-   */
-  private async periodicallyGetNitroHealth(): Promise<void> {
-    const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
-
-    const isRunning = this.nitroProcessInfo?.isRunning ?? false
-    if (isRunning && health.isRunning === false) {
-      console.debug('Nitro process is stopped')
-      events.emit(ModelEvent.OnModelStopped, {})
-    }
-    this.nitroProcessInfo = health
-  }
-
-  override loadModel(model: ModelFile): Promise<void> {
-    if (model.engine !== this.provider) return Promise.resolve()
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    return super.loadModel(model)
-  }
-
-  override async unloadModel(model?: Model): Promise<void> {
-    if (model?.engine && model.engine !== this.provider) return
-
-    // stop the periocally health check
-    if (this.getNitroProcessHealthIntervalId) {
-      clearInterval(this.getNitroProcessHealthIntervalId)
-      this.getNitroProcessHealthIntervalId = undefined
-    }
-    return super.unloadModel(model)
-  }
-
-  override async install(): Promise<void> {
-    const info = await systemInformation()
-
-    const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux'
-    const downloadUrl = CUDA_DOWNLOAD_URL
-
-    const url = downloadUrl
-      .replace('<version>', info.gpuSetting?.cuda?.version ?? '12.4')
-      .replace('<platform>', platform)
-
-    console.debug('Downloading Cuda Toolkit Dependency: ', url)
-
-    const janDataFolderPath = await getJanDataFolderPath()
-
-    const executableFolderPath = await joinPath([
-      janDataFolderPath,
-      'engines',
-      this.name ?? 'cortex-cpp',
-      this.version ?? '1.0.0',
-    ])
-
-    if (!(await fs.existsSync(executableFolderPath))) {
-      await fs.mkdir(executableFolderPath)
-    }
-
-    const tarball = await baseName(url)
-    const tarballFullPath = await joinPath([executableFolderPath, tarball])
-
-    const downloadRequest: DownloadRequest = {
-      url,
-      localPath: tarballFullPath,
-      extensionId: this.name,
-      downloadType: 'extension',
-    }
-    downloadFile(downloadRequest)
-
-    const onFileDownloadSuccess = async (state: DownloadState) => {
-      console.log(state)
-      // if other download, ignore
-      if (state.fileName !== tarball) return
-      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      await executeOnMain(
-        NODE,
-        'decompressRunner',
-        tarballFullPath,
-        executableFolderPath
-      )
-      events.emit(DownloadEvent.onFileUnzipSuccess, state)
-    }
-    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-  }
-
-  override async installationState(): Promise<InstallationState> {
-    const info = await systemInformation()
-    if (
-      info.gpuSetting?.run_mode === 'gpu' &&
-      !info.gpuSetting?.vulkan &&
-      info.osInfo &&
-      info.osInfo.platform !== 'darwin' &&
-      !info.gpuSetting?.cuda?.exist
-    ) {
-      const janDataFolderPath = await getJanDataFolderPath()
-
-      const executableFolderPath = await joinPath([
-        janDataFolderPath,
-        'engines',
-        this.name ?? 'cortex-cpp',
-        this.version ?? '1.0.0',
-      ])
-
-      if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled'
-      return 'Installed'
-    }
-    return 'NotRequired'
-  }
-}
--- a/extensions/inference-nitro-extension/src/node/index.test.ts
+++ b/extensions/inference-nitro-extension/src/node/index.test.ts
@ -1,465 +0,0 @@
-jest.mock('fetch-retry', () => ({
-  default: () => () => {
-    return Promise.resolve({
-      ok: true,
-      status: 200,
-      json: () =>
-        Promise.resolve({
-          model_loaded: true,
-        }),
-      text: () => Promise.resolve(''),
-    })
-  },
-}))
-
-jest.mock('path', () => ({
-  default: {
-    isAbsolute: jest.fn(),
-    join: jest.fn(),
-    parse: () => {
-      return { dir: 'dir' }
-    },
-    delimiter: { concat: () => '' },
-  },
-}))
-
-jest.mock('decompress', () => ({
-  default: () => {
-    return Promise.resolve()
-  },
-}))
-
-jest.mock('@janhq/core/node', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  getJanDataFolderPath: () => '',
-  getSystemResourceInfo: () => {
-    return {
-      cpu: {
-        cores: 1,
-        logicalCores: 1,
-        threads: 1,
-        model: 'model',
-        speed: 1,
-      },
-      memory: {
-        total: 1,
-        free: 1,
-      },
-      gpu: {
-        model: 'model',
-        memory: 1,
-        cuda: {
-          version: 'version',
-          devices: 'devices',
-        },
-        vulkan: {
-          version: 'version',
-          devices: 'devices',
-        },
-      },
-    }
-  },
-}))
-
-jest.mock('fs', () => ({
-  default: {
-    readdirSync: () => [],
-  },
-}))
-
-jest.mock('child_process', () => ({
-  exec: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-    }
-  },
-  spawn: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-      pid: '111',
-    }
-  },
-}))
-
-jest.mock('tcp-port-used', () => ({
-  default: {
-    waitUntilFree: () => Promise.resolve(true),
-    waitUntilUsed: () => Promise.resolve(true),
-  },
-}))
-
-jest.mock('./execute', () => ({
-  executableNitroFile: () => {
-    return {
-      enginePath: 'enginePath',
-      executablePath: 'executablePath',
-      cudaVisibleDevices: 'cudaVisibleDevices',
-      vkVisibleDevices: 'vkVisibleDevices',
-    }
-  },
-}))
-
-jest.mock('terminate', () => ({
-  default: (id: String, func: Function) => {
-    console.log(id)
-    func()
-  },
-}))
-
-import * as execute from './execute'
-import index from './index'
-
-let executeMock = execute
-
-const modelInitOptions: any = {
-  modelFolder: '/path/to/model',
-  model: {
-    id: 'test',
-    name: 'test',
-    engine: 'nitro',
-    version: '0.0',
-    format: 'GGUF',
-    object: 'model',
-    sources: [],
-    created: 0,
-    description: 'test',
-    parameters: {},
-    metadata: {
-      author: '',
-      tags: [],
-      size: 0,
-    },
-    settings: {
-      prompt_template: '{prompt}',
-      llama_model_path: 'model.gguf',
-    },
-  },
-}
-
-describe('loadModel', () => {
-  it('should load a model successfully', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Call the loadModel function
-    const result = await index.loadModel(modelInitOptions, systemInfo)
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    modelInitOptions.model.engine = 'not-nitro'
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-    modelInitOptions.model.engine = 'nitro'
-  })
-
-  it('should reject if model load failed with an error message', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_loaded: false,
-            }),
-          text: () => Promise.resolve('Failed to load model'),
-        })
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to load model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-
-  it('should run on GPU model if ngl is set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 40,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'gpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is not set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: undefined,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is 0', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 0,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-})
-
-describe('unloadModel', () => {
-  it('should unload a model successfully', async () => {
-    // Call the unloadModel function
-    const result = await index.unloadModel()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-  })
-
-  it('should reject if model unload failed with an error message', async () => {
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_unloaded: false,
-            }),
-          text: () => Promise.resolve('Failed to unload model'),
-        })
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to unload model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-})
-describe('dispose', () => {
-  it('should dispose a model successfully on Mac', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'darwin',
-    })
-
-    // Call the dispose function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should kill the subprocess successfully on Windows', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'win32',
-    })
-
-    // Call the killSubprocess function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('getCurrentNitroProcessInfo', () => {
-  it('should return the current nitro process info', async () => {
-    // Call the getCurrentNitroProcessInfo function
-    const result = await index.getCurrentNitroProcessInfo()
-
-    // Assert that the result is as expected
-    expect(result).toEqual({
-      isRunning: true,
-    })
-  })
-})
-
-describe('decompressRunner', () => {
-  it('should decompress the runner successfully', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.resolve()
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-  it('should not reject if decompression failed', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.reject('Failed to decompress')
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('addAdditionalDependencies', () => {
-  it('should add additional dependencies successfully', async () => {
-    // Call the addAdditionalDependencies function
-    const result = await index.addAdditionalDependencies({
-      name: 'name',
-      version: 'version',
-    })
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -1,501 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
-import tcpPortUsed from 'tcp-port-used'
-import fetchRT from 'fetch-retry'
-import {
-  log,
-  getSystemResourceInfo,
-  InferenceEngine,
-  ModelSettingParams,
-  PromptTemplate,
-  SystemInformation,
-  getJanDataFolderPath,
-  ModelFile,
-} from '@janhq/core/node'
-import { executableNitroFile } from './execute'
-import terminate from 'terminate'
-import decompress from 'decompress'
-
-// Polyfill fetch with retry
-const fetchRetry = fetchRT(fetch)
-
-/**
- * The response object for model init operation.
- */
-interface ModelInitOptions {
-  modelFolder: string
-  model: ModelFile
-}
-// The PORT to use for the Nitro subprocess
-const PORT = 3928
-// The HOST address to use for the Nitro subprocess
-const LOCAL_HOST = '127.0.0.1'
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
-// The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-// The URL for the Nitro subprocess to kill itself
-const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-const NITRO_PORT_FREE_CHECK_INTERVAL = 100
-
-// The supported model format
-// TODO: Should be an array to support more models
-const SUPPORTED_MODEL_FORMAT = '.gguf'
-
-// The subprocess instance for Nitro
-let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
-
-// The current model settings
-let currentSettings: (ModelSettingParams & { model?: string }) | undefined =
-  undefined
-
-/**
- * Stops a Nitro subprocess.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-function unloadModel(): Promise<void> {
-  return killSubprocess()
-}
-
-/**
- * Initializes a Nitro subprocess to load a machine learning model.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
- */
-async function loadModel(
-  params: ModelInitOptions,
-  systemInfo?: SystemInformation
-): Promise<ModelOperationResponse | void> {
-  if (params.model.engine !== InferenceEngine.nitro) {
-    // Not a nitro model
-    return Promise.resolve()
-  }
-
-  if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a cortex model')
-  } else {
-    const nitroResourceProbe = await getSystemResourceInfo()
-    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
-    if (params.model.settings.prompt_template) {
-      const promptTemplate = params.model.settings.prompt_template
-      const prompt = promptTemplateConverter(promptTemplate)
-      if (prompt?.error) {
-        return Promise.reject(prompt.error)
-      }
-      params.model.settings.system_prompt = prompt.system_prompt
-      params.model.settings.user_prompt = prompt.user_prompt
-      params.model.settings.ai_prompt = prompt.ai_prompt
-    }
-
-    // modelFolder is the absolute path to the running model folder
-    // e.g. ~/jan/models/llama-2
-    let modelFolder = params.modelFolder
-
-    let llama_model_path = params.model.settings.llama_model_path
-
-    // Absolute model path support
-    if (
-      params.model?.sources.length &&
-      params.model.sources.every((e) => fs.existsSync(e.url))
-    ) {
-      llama_model_path =
-        params.model.sources.length === 1
-          ? params.model.sources[0].url
-          : params.model.sources.find((e) =>
-              e.url.includes(llama_model_path ?? params.model.id)
-            )?.url
-    }
-
-    if (!llama_model_path || !path.isAbsolute(llama_model_path)) {
-      // Look for GGUF model file
-      const modelFiles: string[] = fs.readdirSync(modelFolder)
-      const ggufBinFile = modelFiles.find(
-        (file) =>
-          // 1. Prioritize llama_model_path (predefined)
-          (llama_model_path && file === llama_model_path) ||
-          // 2. Prioritize GGUF File (manual import)
-          file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ||
-          // 3. Fallback Model ID (for backward compatibility)
-          file === params.model.id
-      )
-      if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile)
-    }
-
-    // Look for absolute source path for single model
-
-    if (!llama_model_path) return Promise.reject('No GGUF model file found')
-
-    currentSettings = {
-      cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
-      // model.settings can override the default settings
-      ...params.model.settings,
-      llama_model_path,
-      model: params.model.id,
-      // This is critical and requires real CPU physical core count (or performance core)
-      ...(params.model.settings.mmproj && {
-        mmproj: path.isAbsolute(params.model.settings.mmproj)
-          ? params.model.settings.mmproj
-          : path.join(modelFolder, params.model.settings.mmproj),
-      }),
-    }
-    return runNitroAndLoadModel(params.model.id, systemInfo)
-  }
-}
-
-/**
- * 1. Spawn Nitro process
- * 2. Load model into Nitro subprocess
- * 3. Validate model status
- * @returns
- */
-async function runNitroAndLoadModel(
-  modelId: string,
-  systemInfo?: SystemInformation
-) {
-  // Gather system information for CPU physical cores and memory
-  return killSubprocess()
-    .then(() =>
-      tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-    )
-    .then(() => spawnNitroProcess(systemInfo))
-    .then(() => loadLLMModel(currentSettings))
-    .then(() => validateModelStatus(modelId))
-    .catch((err) => {
-      // TODO: Broadcast error so app could display proper error message
-      log(`[CORTEX]::Error: ${err}`)
-      return { error: err }
-    })
-}
-
-/**
- * Parse prompt template into agrs settings
- * @param promptTemplate Template as string
- * @returns
- */
-function promptTemplateConverter(promptTemplate: string): PromptTemplate {
-  // Split the string using the markers
-  const systemMarker = '{system_message}'
-  const promptMarker = '{prompt}'
-
-  if (
-    promptTemplate.includes(systemMarker) &&
-    promptTemplate.includes(promptMarker)
-  ) {
-    // Find the indices of the markers
-    const systemIndex = promptTemplate.indexOf(systemMarker)
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-
-    // Extract the parts of the string
-    const system_prompt = promptTemplate.substring(0, systemIndex)
-    const user_prompt = promptTemplate.substring(
-      systemIndex + systemMarker.length,
-      promptIndex
-    )
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { system_prompt, user_prompt, ai_prompt }
-  } else if (promptTemplate.includes(promptMarker)) {
-    // Extract the parts of the string for the case where only promptMarker is present
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-    const user_prompt = promptTemplate.substring(0, promptIndex)
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { user_prompt, ai_prompt }
-  }
-
-  // Return an error if none of the conditions are met
-  return { error: 'Cannot split prompt template' }
-}
-
-/**
- * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- */
-function loadLLMModel(settings: any): Promise<Response> {
-  if (!settings?.ngl) {
-    settings.ngl = 100
-  }
-  log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`)
-  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    body: JSON.stringify(settings),
-    retries: 3,
-    retryDelay: 300,
-  })
-    .then((res) => {
-      log(
-        `[CORTEX]:: Load model success with response ${JSON.stringify(
-          res
-        )}`
-      )
-      return Promise.resolve(res)
-    })
-    .catch((err) => {
-      log(`[CORTEX]::Error: Load model failed with error ${err}`)
-      return Promise.reject(err)
-    })
-}
-
-/**
- * Validates the status of a model.
- * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
- * If the model is loaded successfully, the object is empty.
- * If the model is not loaded successfully, the object contains an error message.
- */
-async function validateModelStatus(modelId: string): Promise<void> {
-  // Send a GET request to the validation URL.
-  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
-  log(`[CORTEX]:: Validating model ${modelId}`)
-  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'POST',
-    body: JSON.stringify({
-      model: modelId,
-      // TODO: force to use cortex llamacpp by default
-      engine: 'cortex.llamacpp',
-    }),
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    retries: 5,
-    retryDelay: 300,
-  }).then(async (res: Response) => {
-    log(
-      `[CORTEX]:: Validate model state with response ${JSON.stringify(
-        res.status
-      )}`
-    )
-    // If the response is OK, check model_loaded status.
-    if (res.ok) {
-      const body = await res.json()
-      // If the model is loaded, return an empty object.
-      // Otherwise, return an object with an error message.
-      if (body.model_loaded) {
-        log(
-          `[CORTEX]:: Validate model state success with response ${JSON.stringify(
-            body
-          )}`
-        )
-        return Promise.resolve()
-      }
-    }
-    const errorBody = await res.text()
-    log(
-      `[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
-        res.statusText
-      )}`
-    )
-    return Promise.reject('Validate model status failed')
-  })
-}
-
-/**
- * Terminates the Nitro subprocess.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-async function killSubprocess(): Promise<void> {
-  const controller = new AbortController()
-  setTimeout(() => controller.abort(), 5000)
-  log(`[CORTEX]:: Request to kill cortex`)
-
-  const killRequest = () => {
-    return fetch(NITRO_HTTP_KILL_URL, {
-      method: 'DELETE',
-      signal: controller.signal,
-    })
-      .catch(() => {}) // Do nothing with this attempt
-      .then(() =>
-        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-      )
-      .then(() => log(`[CORTEX]:: cortex process is terminated`))
-      .catch((err) => {
-        log(
-          `[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
-        )
-        throw 'PORT_NOT_AVAILABLE'
-      })
-  }
-
-  if (subprocess?.pid && process.platform !== 'darwin') {
-    log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
-    const pid = subprocess.pid
-    return new Promise((resolve, reject) => {
-      terminate(pid, function (err) {
-        if (err) {
-          log('[CORTEX]::Failed to kill PID - sending request to kill')
-          killRequest().then(resolve).catch(reject)
-        } else {
-          tcpPortUsed
-            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-            .then(() => log(`[CORTEX]:: cortex process is terminated`))
-            .then(() => resolve())
-            .catch(() => {
-              log(
-                '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill'
-              )
-              killRequest().then(resolve).catch(reject)
-            })
-        }
-      })
-    })
-  } else {
-    return killRequest()
-  }
-}
-
-/**
- * Spawns a Nitro subprocess.
- * @returns A promise that resolves when the Nitro subprocess is started.
- */
-function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[CORTEX]:: Spawning cortex subprocess...`)
-
-  return new Promise<void>(async (resolve, reject) => {
-    let executableOptions = executableNitroFile(
-      // If ngl is not set or equal to 0, run on CPU with correct instructions
-      systemInfo?.gpuSetting
-        ? {
-            ...systemInfo.gpuSetting,
-            run_mode:
-              currentSettings?.ngl === undefined || currentSettings.ngl === 0
-                ? 'cpu'
-                : systemInfo.gpuSetting.run_mode,
-          }
-        : undefined
-    )
-
-    const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
-    // Execute the binary
-    log(
-      `[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
-    )
-    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
-
-    // Add engine path to the PATH and LD_LIBRARY_PATH
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
-    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-
-    subprocess = spawn(
-      executableOptions.executablePath,
-      ['1', LOCAL_HOST, PORT.toString()],
-      {
-        cwd: path.join(path.parse(executableOptions.executablePath).dir),
-        env: {
-          ...process.env,
-          ENGINE_PATH: executableOptions.enginePath,
-          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
-          ...(executableOptions.vkVisibleDevices?.length > 0 && {
-            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
-          }),
-        },
-      }
-    )
-
-    // Handle subprocess output
-    subprocess.stdout.on('data', (data: any) => {
-      log(`[CORTEX]:: ${data}`)
-    })
-
-    subprocess.stderr.on('data', (data: any) => {
-      log(`[CORTEX]::Error: ${data}`)
-    })
-
-    subprocess.on('close', (code: any) => {
-      log(`[CORTEX]:: cortex exited with code: ${code}`)
-      subprocess = undefined
-      reject(`child process exited with code ${code}`)
-    })
-
-    tcpPortUsed
-      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
-      .then(() => {
-        log(`[CORTEX]:: cortex is ready`)
-        resolve()
-      })
-  })
-}
-
-/**
- * Every module should have a dispose function
- * This will be called when the extension is unloaded and should clean up any resources
- * Also called when app is closed
- */
-function dispose() {
-  // clean other registered resources here
-  killSubprocess()
-}
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
-  isRunning: boolean
-}
-
-/**
- * Retrieve current nitro process
- */
-const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
-  return {
-    isRunning: subprocess != null,
-  }
-}
-
-const addAdditionalDependencies = (data: { name: string; version: string }) => {
-  log(
-    `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
-  )
-  const additionalPath = path.delimiter.concat(
-    path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
-  )
-  // Set the updated PATH
-  process.env.PATH = (process.env.PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-  process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-}
-
-const decompressRunner = async (zipPath: string, output: string) => {
-  console.debug(`Decompressing ${zipPath} to ${output}...`)
-  try {
-    const files = await decompress(zipPath, output)
-    console.debug('Decompress finished!', files)
-  } catch (err) {
-    console.error(`Decompress ${zipPath} failed: ${err}`)
-  }
-}
-
-export default {
-  loadModel,
-  unloadModel,
-  dispose,
-  getCurrentNitroProcessInfo,
-  addAdditionalDependencies,
-  decompressRunner,
-}
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@ -4,7 +4,6 @@
  "version": "1.0.34",
  "description": "Model Management Extension provides model exploration and seamless downloads",
  "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
@ -36,15 +35,9 @@
    "README.md"
  ],
  "dependencies": {
-    "@huggingface/gguf": "^0.0.11",
-    "@huggingface/jinja": "^0.3.0",
    "@janhq/core": "file:../../core",
-    "hyllama": "^0.2.2",
-    "python-shell": "^5.0.0"
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1"
  },
-  "bundleDependencies": [
-    "hyllama",
-    "@huggingface/gguf",
-    "@huggingface/jinja"
-  ]
+  "bundleDependencies": []
 }
--- a/extensions/model-extension/resources/default-model.json
+++ b/extensions/model-extension/resources/default-model.json
@ -1,36 +0,0 @@
-{
-  "object": "model",
-  "version": "1.0",
-  "format": "gguf",
-  "sources": [
-    {
-      "url": "N/A",
-      "filename": "N/A"
-    }
-  ],
-  "id": "N/A",
-  "name": "N/A",
-  "created": 0,
-  "description": "User self import model",
-  "settings": {
-    "ctx_len": 2048,
-    "embedding": false,
-    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
-    "llama_model_path": "N/A"
-  },
-  "parameters": {
-    "temperature": 0.7,
-    "top_p": 0.95,
-    "stream": true,
-    "max_tokens": 2048,
-    "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
-    "frequency_penalty": 0,
-    "presence_penalty": 0
-  },
-  "metadata": {
-    "author": "User",
-    "tags": [],
-    "size": 0
-  },
-  "engine": "nitro"
-}
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace'
 import commonjs from '@rollup/plugin-commonjs'
 const settingJson = require('./resources/settings.json')
 const packageJson = require('./package.json')
-const defaultModelJson = require('./resources/default-model.json')

 export default [
  {
@ -20,17 +19,18 @@ export default [
    plugins: [
      replace({
        preventAssignment: true,
-        DEFAULT_MODEL: JSON.stringify(defaultModelJson),
        SETTINGS: JSON.stringify(settingJson),
-        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      }),
      // Allow json resolution
      json(),
      //     Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
+      typescript({
+        useTsconfigDeclarationDir: true,
+        exclude: ['**/__tests__', '**/*.test.ts'],
+      }),
      // Compile TypeScript files
      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      // commonjs(),
+      commonjs(),
      // Allow node_modules resolution, so you can use 'external' to control
      // which external modules to include in the bundle
      // https://github.com/rollup/rollup-plugin-node-resolve#usage
@ -39,39 +39,6 @@ export default [
        browser: true,
      }),

-      // Resolve source maps to the original source
-      sourceMaps(),
-    ],
-  },
-  {
-    input: `src/node/index.ts`,
-    output: [
-      {
-        file: 'dist/node/index.cjs.js',
-        format: 'cjs',
-        sourcemap: true,
-        inlineDynamicImports: true,
-      },
-    ],
-    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
-    external: ['@janhq/core/node'],
-    watch: {
-      include: 'src/node/**',
-    },
-    plugins: [
-      // Allow json resolution
-      json(),
-      // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
-      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      commonjs(),
-      // Allow node_modules resolution, so you can use 'external' to control
-      // which external modules to include in the bundle
-      // https://github.com/rollup/rollup-plugin-node-resolve#usage
-      resolve({
-        extensions: ['.ts', '.js', '.json'],
-      }),
-
      // Resolve source maps to the original source
      sourceMaps(),
    ],
--- a/extensions/model-extension/src/@types/InvalidHostError.ts
+++ b/extensions/model-extension/src/@types/InvalidHostError.ts
@ -1,6 +0,0 @@
-export class InvalidHostError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'InvalidHostError'
-  }
-}
--- a/extensions/model-extension/src/@types/NotSupportModelError.ts
+++ b/extensions/model-extension/src/@types/NotSupportModelError.ts
@ -1,6 +0,0 @@
-export class NotSupportedModelError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'NotSupportedModelError'
-  }
-}
--- a/extensions/model-extension/src/@types/global.d.ts
+++ b/extensions/model-extension/src/@types/global.d.ts
@ -1,6 +1,5 @@
 export {}
 declare global {
-  declare const DEFAULT_MODEL: object
  declare const NODE: string

  interface Core {
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@ -0,0 +1,166 @@
+import PQueue from 'p-queue'
+import ky from 'ky'
+import {
+  DownloadEvent,
+  events,
+  Model,
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@janhq/core'
+/**
+ * cortex.cpp Model APIs interface
+ */
+interface ICortexAPI {
+  getModel(model: string): Promise<Model>
+  getModels(): Promise<Model[]>
+  pullModel(model: string): Promise<void>
+  importModel(path: string, modelPath: string): Promise<void>
+  deleteModel(model: string): Promise<void>
+  updateModel(model: object): Promise<void>
+  cancelModelPull(model: string): Promise<void>
+}
+/**
+ * Simple CortexAPI service
+ * It could be replaced by cortex client sdk later on
+ */
+const API_URL = 'http://127.0.0.1:39291'
+const SOCKET_URL = 'ws://127.0.0.1:39291'
+
+type ModelList = {
+  data: any[]
+}
+
+export class CortexAPI implements ICortexAPI {
+  queue = new PQueue({ concurrency: 1 })
+  socket?: WebSocket = undefined
+
+  constructor() {
+    this.queue.add(() => this.healthz())
+    this.subscribeToEvents()
+  }
+
+  getModel(model: string): Promise<any> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/models/${model}`)
+        .json()
+        .then((e) => this.transformModel(e))
+    )
+  }
+
+  getModels(): Promise<Model[]> {
+    return this.queue
+      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .then((e) =>
+        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
+      )
+  }
+
+  pullModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/pull`, { json: { model } })
+        .json()
+        .catch(async (e) => {
+          throw (await e.response?.json()) ?? e
+        })
+        .then()
+    )
+  }
+
+  importModel(model: string, modelPath: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/import`, { json: { model, modelPath } })
+        .json()
+        .catch((e) => console.debug(e)) // Ignore error
+        .then()
+    )
+  }
+
+  deleteModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky.delete(`${API_URL}/models/${model}`).json().then()
+    )
+  }
+
+  updateModel(model: object): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .patch(`${API_URL}/v1/models/${model}`, { json: { model } })
+        .json()
+        .then()
+    )
+  }
+  cancelModelPull(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .json()
+        .then()
+    )
+  }
+
+  healthz(): Promise<void> {
+    return ky
+      .get(`${API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => {})
+  }
+
+  subscribeToEvents() {
+    this.queue.add(
+      () =>
+        new Promise<void>((resolve) => {
+          this.socket = new WebSocket(`${SOCKET_URL}/events`)
+          console.log('Socket connected')
+
+          this.socket.addEventListener('message', (event) => {
+            const data = JSON.parse(event.data)
+            const transferred = data.task.items.reduce(
+              (accumulator, currentValue) =>
+                accumulator + currentValue.downloadedBytes,
+              0
+            )
+            const total = data.task.items.reduce(
+              (accumulator, currentValue) => accumulator + currentValue.bytes,
+              0
+            )
+            const percent = ((transferred ?? 1) / (total ?? 1)) * 100
+
+            events.emit(data.type, {
+              modelId: data.task.id,
+              percent: percent,
+              size: {
+                transferred: transferred,
+                total: total,
+              },
+            })
+          })
+          resolve()
+        })
+    )
+  }
+
+  private transformModel(model: any) {
+    model.parameters = setParameters<ModelRuntimeParams>(model)
+    model.settings = setParameters<ModelSettingParams>(model)
+    model.metadata = {
+      tags: [],
+    }
+    return model as Model
+  }
+}
+
+type FilteredParams<T> = {
+  [K in keyof T]: T[K]
+}
+
+function setParameters<T>(params: T): T {
+  const filteredParams: FilteredParams<T> = { ...params }
+  return filteredParams
+}
--- a/extensions/model-extension/src/helpers/path.test.ts
+++ b/extensions/model-extension/src/helpers/path.test.ts
@ -1,87 +0,0 @@
-import { extractFileName } from './path';
-
-describe('extractFileName Function', () => {
-  it('should correctly extract the file name with the provided file extension', () => {
-    const url = 'http://example.com/some/path/to/file.ext';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.ext');
-  });
-
-  it('should correctly append the file extension if it does not already exist in the file name', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should handle cases where the URL does not have a file extension correctly', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.jpg';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.jpg');
-  });
-
-  it('should correctly handle URLs without a trailing slash', () => {
-    const url = 'http://example.com/some/path/tofile';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.txt');
-  });
-
-  it('should correctly handle URLs with multiple file extensions', () => {
-    const url = 'http://example.com/some/path/tofile.tar.gz';
-    const fileExtension = '.gz';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.tar.gz');
-  });
-
-  it('should correctly handle URLs with special characters', () => {
-    const url = 'http://example.com/some/path/tófílë.extë';
-    const fileExtension = '.extë';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tófílë.extë');
-  });
-
-  it('should correctly handle URLs that are just a file with no path', () => {
-    const url = 'http://example.com/file.txt';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should correctly handle URLs that have special query parameters', () => {
-    const url = 'http://example.com/some/path/tofile.ext?query=1';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url.split('?')[0], fileExtension);
-    expect(fileName).toBe('tofile.ext');
-  });
-
-  it('should correctly handle URLs that have uppercase characters', () => {
-    const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('FILE.EXT');
-  });
-
-  it('should correctly handle invalid URLs', () => {
-    const url = 'invalid-url';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('invalid-url.txt');
-  });
-
-  it('should correctly handle empty URLs', () => {
-    const url = '';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-
-  it('should correctly handle undefined URLs', () => {
-    const url = undefined;
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url as any, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-});
--- a/extensions/model-extension/src/helpers/path.ts
+++ b/extensions/model-extension/src/helpers/path.ts
@ -1,13 +0,0 @@
-/**
- *  try to retrieve the download file name from the source url
- */
-
-export function extractFileName(url: string, fileExtension: string): string {
-  if(!url) return fileExtension
-
-  const extractedFileName = url.split('/').pop()
-  const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
-    ? extractedFileName
-    : extractedFileName + fileExtension
-  return fileName
-}
--- a/extensions/model-extension/src/index.test.ts
+++ b/extensions/model-extension/src/index.test.ts
@ -1,846 +1,90 @@
-/**
- * @jest-environment jsdom
- */
-const readDirSyncMock = jest.fn()
-const existMock = jest.fn()
-const readFileSyncMock = jest.fn()
-const downloadMock = jest.fn()
-const mkdirMock = jest.fn()
-const writeFileSyncMock = jest.fn()
-const copyFileMock = jest.fn()
-const dirNameMock = jest.fn()
-const executeMock = jest.fn()
+import JanModelExtension from './index'
+import { Model } from '@janhq/core'
+
+let SETTINGS = []
+// @ts-ignore
+global.SETTINGS = SETTINGS

 jest.mock('@janhq/core', () => ({
  ...jest.requireActual('@janhq/core/node'),
  events: {
    emit: jest.fn(),
  },
-  fs: {
-    existsSync: existMock,
-    readdirSync: readDirSyncMock,
-    readFileSync: readFileSyncMock,
-    writeFileSync: writeFileSyncMock,
-    mkdir: mkdirMock,
-    copyFile: copyFileMock,
-    fileStat: () => ({
-      isDirectory: false,
-    }),
-  },
-  dirName: dirNameMock,
  joinPath: (paths) => paths.join('/'),
-  ModelExtension: jest.fn(),
-  downloadFile: downloadMock,
-  executeOnMain: executeMock,
+  ModelExtension: jest.fn().mockImplementation(function () {
+    // @ts-ignore
+    this.registerSettings = () => {
+      return Promise.resolve()
+    }
+    // @ts-ignore
+    return this
+  }),
 }))

-jest.mock('@huggingface/gguf')
-
-global.fetch = jest.fn(() =>
-  Promise.resolve({
-    json: () => Promise.resolve({ test: 100 }),
-    arrayBuffer: jest.fn(),
-  })
-) as jest.Mock
-
-import JanModelExtension from '.'
-import { fs, dirName } from '@janhq/core'
-import { gguf } from '@huggingface/gguf'
-
 describe('JanModelExtension', () => {
-  let sut: JanModelExtension
-
-  beforeAll(() => {
-    // @ts-ignore
-    sut = new JanModelExtension()
-  })
+  let extension: JanModelExtension
+  let mockCortexAPI: any

  beforeEach(() => {
-    jest.clearAllMocks()
-  })
-
-  describe('getConfiguredModels', () => {
-    describe("when there's no models are pre-populated", () => {
-      it('should return empty array', async () => {
-        // Mock configured models data
-        const configuredModels = []
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual([])
-      })
-    })
-
-    describe("when there's are pre-populated models - all flattened", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe("when there's are pre-populated models - there are nested folders", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else if (path.includes('model2/model2-1'))
-            return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('getDownloadedModels', () => {
-    describe('no models downloaded', () => {
-      it('should return empty array', async () => {
-        // Mock downloaded models data
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual([])
-      })
-    })
-    describe('only one model is downloaded', () => {
-      describe('flatten folder', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2']
-            else if (path === 'file://models/model1')
-              return ['model.json', 'test.gguf']
-            else return ['model.json']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded', () => {
-      describe('nested folders', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2/model2-1']
-            else return ['model.json', 'test.gguf']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-              expect.objectContaining({
-                file_path: 'file://models/model2/model2-1/model.json',
-                id: '2',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded with uppercased GGUF files', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.GGUF']
-          else return ['model.json', 'test.gguf']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe('all models are downloaded - GGUF & Tensort RT', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.gguf']
-          else return ['model.json', 'test.engine']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('deleteModel', () => {
-    describe('model is a GGUF model', () => {
-      it('should delete the GGUF file', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockImplementation((path) => {
-          return ['model.json', 'test.gguf']
-        })
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.gguf'
-        )
-      })
-
-      it('no gguf file presented', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockReturnValue(['model.json'])
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledTimes(0)
-      })
-
-      it('delete an imported model', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.gguf'])
-
-        // MARK: This is a tricky logic implement?
-        // I will just add test for now but will align on the legacy implementation
-        fs.readFileSync = jest.fn().mockReturnValue(
-          JSON.stringify({
-            metadata: {
-              author: 'user',
-            },
-          })
-        )
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.rm).toHaveBeenCalledWith('file://models/model1')
-      })
-
-      it('delete tensorrt-models', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.engine'])
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.engine'
-        )
-      })
-    })
-  })
-
-  describe('downloadModel', () => {
-    const model: any = {
-      id: 'model-id',
-      name: 'Test Model',
-      sources: [
-        { url: 'http://example.com/model.gguf', filename: 'model.gguf' },
-      ],
-      engine: 'test-engine',
+    mockCortexAPI = {
+      getModels: jest.fn().mockResolvedValue([]),
+      pullModel: jest.fn().mockResolvedValue(undefined),
+      importModel: jest.fn().mockResolvedValue(undefined),
+      deleteModel: jest.fn().mockResolvedValue(undefined),
+      updateModel: jest.fn().mockResolvedValue({}),
+      cancelModelPull: jest.fn().mockResolvedValue(undefined),
    }

-    const network = {
-      ignoreSSL: true,
-      proxy: 'http://proxy.example.com',
-    }
+    // @ts-ignore
+    extension = new JanModelExtension()
+    extension.cortexAPI = mockCortexAPI
+  })

-    const gpuSettings: any = {
-      gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }],
-    }
+  it('should register settings on load', async () => {
+    // @ts-ignore
+    const registerSettingsSpy = jest.spyOn(extension, 'registerSettings')
+    await extension.onLoad()
+    expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS)
+  })

-    it('should reject with invalid gguf metadata', async () => {
-      existMock.mockImplementation(() => false)
+  it('should pull a model', async () => {
+    const model = 'test-model'
+    await extension.pullModel(model)
+    expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model)
+  })

-      expect(
-        sut.downloadModel(model, gpuSettings, network)
-      ).rejects.toBeTruthy()
-    })
+  it('should cancel model download', async () => {
+    const model = 'test-model'
+    await extension.cancelModelPull(model)
+    expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model)
+  })

-    it('should download corresponding ID', async () => {
-      existMock.mockImplementation(() => true)
-      dirNameMock.mockImplementation(() => 'file://models/model1')
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
+  it('should delete a model', async () => {
+    const model = 'test-model'
+    await extension.deleteModel(model)
+    expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model)
+  })

-      expect(
-        await sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
+  it('should get all models', async () => {
+    const models = await extension.getModels()
+    expect(models).toEqual([])
+    expect(mockCortexAPI.getModels).toHaveBeenCalled()
+  })

-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
+  it('should update a model', async () => {
+    const model: Partial<Model> = { id: 'test-model' }
+    const updatedModel = await extension.updateModel(model)
+    expect(updatedModel).toEqual({})
+    expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model)
+  })

-    it('should handle invalid model file', async () => {
-      executeMock.mockResolvedValue({})
-
-      fs.readFileSync = jest.fn(() => {
-        return JSON.stringify({ metadata: { author: 'user' } })
-      })
-
-      expect(
-        sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).resolves.not.toThrow()
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-    it('should handle model file with no sources', async () => {
-      executeMock.mockResolvedValue({})
-      const modelWithoutSources = { ...model, sources: [] }
-
-      expect(
-        sut.downloadModel(
-          {
-            ...modelWithoutSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).resolves.toBe(undefined)
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-
-    it('should handle model file with multiple sources', async () => {
-      const modelWithMultipleSources = {
-        ...model,
-        sources: [
-          { url: 'http://example.com/model1.gguf', filename: 'model1.gguf' },
-          { url: 'http://example.com/model2.gguf', filename: 'model2.gguf' },
-        ],
-      }
-
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
-
-      expect(
-        await sut.downloadModel(
-          {
-            ...modelWithMultipleSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model1.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model1.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model2.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model2.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
-
-    it('should handle model file with no file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithoutFilepath = { ...model, file_path: undefined }
-
-      await sut.downloadModel(modelWithoutFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model-id/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model file with invalid file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithInvalidFilepath = {
-        ...model,
-        file_path: 'file://models/invalid-model.json',
-      }
-
-      await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model1/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model with valid chat_template', async () => {
-      executeMock.mockResolvedValue('{prompt}')
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '{prompt}',
-        },
-      })
-    })
-
-    it('should handle model without chat_template', async () => {
-      executeMock.mockRejectedValue({})
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      })
-    })
+  it('should import a model', async () => {
+    const model: any = { path: 'test-path' }
+    const optionType: any = 'test-option'
+    await extension.importModel(model, optionType)
+    expect(mockCortexAPI.importModel).toHaveBeenCalledWith(
+      model.path,
+      optionType
+    )
  })
 })
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
--- a/extensions/model-extension/src/node/index.ts
+++ b/extensions/model-extension/src/node/index.ts
@ -1,54 +0,0 @@
-import { closeSync, openSync, readSync } from 'fs'
-import { Template } from '@huggingface/jinja'
-/**
- * This is to retrieve the metadata from a GGUF file
- * It uses hyllama and jinja from @huggingface module
- */
-export const retrieveGGUFMetadata = async (ggufPath: string) => {
-  try {
-    const { ggufMetadata } = await import('hyllama')
-    // Read first 10mb of gguf file
-    const fd = openSync(ggufPath, 'r')
-    const buffer = new Uint8Array(10_000_000)
-    readSync(fd, buffer, 0, 10_000_000, 0)
-    closeSync(fd)
-
-    // Parse metadata and tensor info
-    const { metadata } = ggufMetadata(buffer.buffer)
-
-    return metadata
-  } catch (e) {
-    console.log('[MODEL_EXT]', e)
-  }
-}
-
-/**
- * Convert metadata to jinja template
- * @param metadata
- */
-export const renderJinjaTemplate = (metadata: any): string => {
-  const template = new Template(metadata['tokenizer.chat_template'])
-  const eos_id = metadata['tokenizer.ggml.eos_token_id']
-  const bos_id = metadata['tokenizer.ggml.bos_token_id']
-  if (eos_id === undefined || bos_id === undefined) {
-    return ''
-  }
-  const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
-  const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
-  // Parse jinja template
-  return template.render({
-    add_generation_prompt: true,
-    eos_token,
-    bos_token,
-    messages: [
-      {
-        role: 'system',
-        content: '{system_message}',
-      },
-      {
-        role: 'user',
-        content: '{prompt}',
-      },
-    ],
-  })
-}
--- a/extensions/model-extension/src/node/node.test.ts
+++ b/extensions/model-extension/src/node/node.test.ts
@ -1,53 +0,0 @@
-import { renderJinjaTemplate } from './index'
-import { Template } from '@huggingface/jinja'
-
-jest.mock('@huggingface/jinja', () => ({
-  Template: jest.fn((template: string) => ({
-    render: jest.fn(() => `${template}_rendered`),
-  })),
-}))
-
-describe('renderJinjaTemplate', () => {
-  beforeEach(() => {
-    jest.clearAllMocks() // Clear mocks between tests
-  })
-
-  it('should render the template with correct parameters', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.bos_token_id': 1,
-      'tokenizer.ggml.tokens': ['EOS', 'BOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered')
-  })
-
-  it('should handle missing token IDs gracefully', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.tokens': ['EOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('')
-  })
-
-  it('should handle empty template gracefully', () => {
-    const metadata = {}
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith(undefined)
-
-    expect(renderedTemplate).toBe("")
-  })
-})
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@ -7,9 +7,7 @@ import {
  DownloadEvent,
  DownloadRequest,
  DownloadState,
-  GpuSetting,
  InstallationState,
-  Model,
  baseName,
  downloadFile,
  events,
@ -23,7 +21,7 @@ import {
  ModelEvent,
  getJanDataFolderPath,
  SystemInformation,
-  ModelFile,
+  Model,
 } from '@janhq/core'

 /**
@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
    events.emit(ModelEvent.OnModelsUpdate, {})
  }

-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
    if ((await this.installationState()) === 'Installed')
      return super.loadModel(model)

@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
  override async inference(data: MessageRequest) {
    if (!this.loadedModel) return
    // TensorRT LLM Extension supports streaming only
-    if (data.model) data.model.parameters.stream = true
+    if (data.model && data.model.parameters) data.model.parameters.stream = true
    super.inference(data)
  }

--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@ -41,7 +41,7 @@ async function loadModel(
  // e.g. ~/jan/models/llama-2
  let modelFolder = params.modelFolder

-  if (params.model.settings.prompt_template) {
+  if (params.model.settings?.prompt_template) {
    const promptTemplate = params.model.settings.prompt_template
    const prompt = promptTemplateConverter(promptTemplate)
    if (prompt?.error) {
--- a/Show More
+++ b/Show More