diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts
index 603445745..d934e1c06 100644
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
+import { events } from './events'
 import { fs } from './fs'
+import { ModelManager } from './models'
 
 export enum ExtensionTypeEnum {
   Assistant = 'assistant',
@@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
     return undefined
   }
 
+  /**
+   * Registers models - it persists in-memory shared ModelManager instance's data map.
+   * @param models
+   */
+  async registerModels(models: Model[]): Promise<void> {
+    for (const model of models) {
+      ModelManager.instance().register(model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Register settings for the extension.
+   * @param settings
+   * @returns
+   */
   async registerSettings(settings: SettingComponentProps[]): Promise<void> {
     if (!this.name) {
       console.error('Extension name is not defined')
@@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
     }
   }
 
+  /**
+   * Get the setting value for the key.
+   * @param key
+   * @param defaultValue
+   * @returns
+   */
   async getSetting<T>(key: string, defaultValue: T) {
     const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
 
@@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
     return
   }
 
+  /**
+   * Get the settings for the extension.
+   * @returns
+   */
   async getSettings(): Promise<SettingComponentProps[]> {
     if (!this.name) return []
 
@@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
     }
   }
 
+  /**
+   * Update the settings for the extension.
+   * @param componentProps
+   * @returns
+   */
   async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
     if (!this.name) return
 
diff --git a/core/src/browser/extensions/engines/AIEngine.test.ts b/core/src/browser/extensions/engines/AIEngine.test.ts
index 59dad280f..ab3280e1c 100644
--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
-import { EngineManager } from './EngineManager'
-import { fs } from '../../fs'
+import { ModelEvent, Model } from '../../../types'
 
 jest.mock('../../events')
 jest.mock('./EngineManager')
@@ -26,7 +24,7 @@ describe('AIEngine', () => {
   })
 
   it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any
 
     await engine.loadModel(model)
 
@@ -34,7 +32,7 @@ describe('AIEngine', () => {
   })
 
   it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any
 
     await engine.loadModel(model)
 
diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts
index 75354de88..d0528b0ab 100644
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@@ -1,17 +1,14 @@
-import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
-import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
+import { ModelManager } from '../../models/manager'
 
 /**
  * Base AIEngine
  * Applicable to all AI Engines
  */
 export abstract class AIEngine extends BaseExtension {
-  private static modelsFolder = 'models'
-
   // The inference engine
   abstract provider: string
 
@@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
   override onLoad() {
     this.registerEngine()
 
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
     events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
   }
 
@@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
     EngineManager.instance().register(this)
   }
 
-  async registerModels(models: Model[]): Promise<void> {
-    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
-
-    let shouldNotifyModelUpdate = false
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-      const isExist = await fs.existsSync(modelPath)
-
-      if (isExist) {
-        await this.migrateModelIfNeeded(model, modelPath)
-        continue
-      }
-
-      await fs.mkdir(modelPath)
-      await fs.writeFileSync(
-        await joinPath([modelPath, 'model.json']),
-        JSON.stringify(model, null, 2)
-      )
-      shouldNotifyModelUpdate = true
-    }
-
-    if (shouldNotifyModelUpdate) {
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-  }
-
-  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
-    try {
-      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
-      const currentModel: Model = JSON.parse(modelJson)
-      if (currentModel.version !== model.version) {
-        await fs.writeFileSync(
-          await joinPath([modelPath, 'model.json']),
-          JSON.stringify(model, null, 2)
-        )
-
-        events.emit(ModelEvent.OnModelsUpdate, {})
-      }
-    } catch (error) {
-      console.warn('Error while try to migrating model', error)
-    }
-  }
-
   /**
    * Loads the model.
    */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
     if (model.engine.toString() !== this.provider) return Promise.resolve()
     events.emit(ModelEvent.OnModelReady, model)
     return Promise.resolve()
diff --git a/core/src/browser/extensions/engines/EngineManager.ts b/core/src/browser/extensions/engines/EngineManager.ts
index 2980c5c65..90ce75ac5 100644
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@@ -1,3 +1,4 @@
+import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'
 
 /**
@@ -20,6 +21,22 @@ export class EngineManager {
    * @returns The engine, if found.
    */
   get<T extends AIEngine>(provider: string): T | undefined {
+    // Backward compatible provider
+    // nitro is migrated to cortex
+    if (
+      [
+        InferenceEngine.nitro,
+        InferenceEngine.cortex,
+        InferenceEngine.cortex_llamacpp,
+        InferenceEngine.cortex_onnx,
+        InferenceEngine.cortex_tensorrtllm,
+        InferenceEngine.cortex_onnx,
+      ]
+        .map((e) => e.toString())
+        .includes(provider)
+    )
+      provider = InferenceEngine.cortex
+
     return this.engines.get(provider) as T | undefined
   }
 
@@ -27,6 +44,6 @@ export class EngineManager {
    * The instance of the engine manager.
    */
   static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
   }
 }
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
index 4ae81496f..4a36f6b12 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@@ -3,7 +3,7 @@
  */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'
 
 jest.mock('../../core', () => ({
@@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: null }
@@ -54,7 +54,6 @@ describe('LocalOAIEngine', () => {
 
     await engine.loadModel(model)
 
-    expect(dirName).toHaveBeenCalledWith(model.file_path)
     expect(systemInformation).toHaveBeenCalled()
     expect(executeOnMain).toHaveBeenCalledWith(
       engine.nodeModule,
@@ -66,7 +65,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: 'load error' }
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts
index 123b9a593..6c70fa186 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@@ -1,6 +1,6 @@
 import { executeOnMain, systemInformation, dirName } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'
 
 /**
@@ -22,35 +22,36 @@ export abstract class LocalOAIEngine extends OAIEngine {
   override onLoad() {
     super.onLoad()
     // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
     events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
   }
 
   /**
    * Load the model.
    */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
     if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
-    const systemInfo = await systemInformation()
-    const res = await executeOnMain(
-      this.nodeModule,
-      this.loadModelFunctionName,
-      {
-        modelFolder,
-        model,
-      },
-      systemInfo
-    )
+    // const modelFolder = await dirName(model.file_path)
+    // const systemInfo = await systemInformation()
+    // const res = await executeOnMain(
+    //   this.nodeModule,
+    //   this.loadModelFunctionName,
+    //   {
+    //     modelFolder,
+    //     model,
+    //   },
+    //   systemInfo
+    // )
 
-    if (res?.error) {
-      events.emit(ModelEvent.OnModelFail, { error: res.error })
-      return Promise.reject(res.error)
-    } else {
-      this.loadedModel = model
-      events.emit(ModelEvent.OnModelReady, model)
-      return Promise.resolve()
-    }
+    // if (res?.error) {
+    //   events.emit(ModelEvent.OnModelFail, { error: res.error })
+    //   return Promise.reject(res.error)
+    // } else {
+    //   this.loadedModel = model
+    //   events.emit(ModelEvent.OnModelReady, model)
+    //   return Promise.resolve()
+    // }
+    return Promise.resolve()
   }
   /**
    * Stops the model.
diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts
index a8dde4677..694a87264 100644
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
    * Inference request
    */
   override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
+      events.emit(MessageEvent.OnMessageResponse, {
+        status: MessageStatus.Error,
+        content: [
+          {
+            type: ContentType.Text,
+            text: {
+              value: 'No model ID provided',
+              annotations: [],
+            },
+          },
+        ],
+      })
+      return
+    }
 
     const timestamp = Date.now()
     const message: ThreadMessage = {
@@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
       model: model.id,
       stream: true,
       ...model.parameters,
-      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
     }
     if (this.transformPayload) {
       requestBody = this.transformPayload(requestBody)
diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index 024ced470..9713256b3 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -10,7 +10,7 @@ export function requestInference(
   requestBody: any,
   model: {
     id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
   },
   controller?: AbortController,
   headers?: HeadersInit,
@@ -22,7 +22,7 @@ export function requestInference(
       headers: {
         'Content-Type': 'application/json',
         'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
         ...headers,
       },
       body: JSON.stringify(requestBody),
@@ -45,7 +45,7 @@ export function requestInference(
           subscriber.complete()
           return
         }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
           const data = await response.json()
           if (transformResponse) {
             subscriber.next(transformResponse(data))
diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index 040542927..d111c1d3a 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
-  GpuSetting,
-  HuggingFaceRepoData,
-  ImportingModel,
-  Model,
-  ModelFile,
-  ModelInterface,
-  OptionType,
-} from '../../types'
+import { Model, ModelInterface, OptionType } from '../../types'
 
 /**
  * Model extension for managing models.
@@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
     return ExtensionTypeEnum.Model
   }
 
-  abstract downloadModel(
-    model: Model,
-    gpuSettings?: GpuSetting,
-    network?: { proxy: string; ignoreSSL?: boolean }
-  ): Promise<void>
-  abstract cancelModelDownload(modelId: string): Promise<void>
-  abstract deleteModel(model: ModelFile): Promise<void>
-  abstract getDownloadedModels(): Promise<ModelFile[]>
-  abstract getConfiguredModels(): Promise<ModelFile[]>
-  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
-  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
-  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
-  abstract getDefaultModel(): Promise<Model>
+  abstract getModels(): Promise<Model[]>
+  abstract pullModel(model: string): Promise<void>
+  abstract cancelModelPull(modelId: string): Promise<void>
+  abstract importModel(model: string, modePath: string): Promise<void>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
+  abstract deleteModel(model: string): Promise<void>
 }
diff --git a/core/src/browser/index.test.ts b/core/src/browser/index.test.ts
index 339cd9046..c8cabbb0b 100644
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@@ -1,32 +1,37 @@
-import * as Core from './core';
-import * as Events from './events';
-import * as FileSystem from './fs';
-import * as Extension from './extension';
-import * as Extensions from './extensions';
-import * as Tools from './tools';
+import * as Core from './core'
+import * as Events from './events'
+import * as FileSystem from './fs'
+import * as Extension from './extension'
+import * as Extensions from './extensions'
+import * as Tools from './tools'
+import * as Models from './models'
 
 describe('Module Tests', () => {
-    it('should export Core module', () => {
-        expect(Core).toBeDefined();
-    });
+  it('should export Core module', () => {
+    expect(Core).toBeDefined()
+  })
 
-    it('should export Event module', () => {
-        expect(Events).toBeDefined();
-    });
+  it('should export Event module', () => {
+    expect(Events).toBeDefined()
+  })
 
-    it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
-    });
+  it('should export Filesystem module', () => {
+    expect(FileSystem).toBeDefined()
+  })
 
-    it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
-    });
+  it('should export Extension module', () => {
+    expect(Extension).toBeDefined()
+  })
 
-    it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
-    });
+  it('should export all base extensions', () => {
+    expect(Extensions).toBeDefined()
+  })
 
-    it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
-    });
-});
\ No newline at end of file
+  it('should export all base tools', () => {
+    expect(Tools).toBeDefined()
+  })
+
+  it('should export all base tools', () => {
+    expect(Models).toBeDefined()
+  })
+})
diff --git a/core/src/browser/index.ts b/core/src/browser/index.ts
index a7803c7e0..a6ce187ca 100644
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@@ -33,3 +33,9 @@ export * from './extensions'
  * @module
  */
 export * from './tools'
+
+/**
+ * Export all base models.
+ * @module
+ */
+export * from './models'
diff --git a/core/src/browser/models/index.ts b/core/src/browser/models/index.ts
new file mode 100644
index 000000000..c16479b2b
--- /dev/null
+++ b/core/src/browser/models/index.ts
@@ -0,0 +1,5 @@
+/**
+ * Export ModelManager
+ * @module
+ */
+export { ModelManager } from './manager'
diff --git a/core/src/browser/models/manager.ts b/core/src/browser/models/manager.ts
new file mode 100644
index 000000000..4853989fe
--- /dev/null
+++ b/core/src/browser/models/manager.ts
@@ -0,0 +1,40 @@
+import { Model, ModelEvent } from '../../types'
+import { events } from '../events'
+
+/**
+ * Manages the registered models across extensions.
+ */
+export class ModelManager {
+  public models = new Map<string, Model>()
+
+  constructor() {
+    if (window) {
+      window.core.modelManager = this
+    }
+  }
+
+  /**
+   * Registers a model.
+   * @param model - The model to register.
+   */
+  register<T extends Model>(model: T) {
+    this.models.set(model.id, model)
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Retrieves a model by it's id.
+   * @param id - The id of the model to retrieve.
+   * @returns The model, if found.
+   */
+  get<T extends Model>(id: string): T | undefined {
+    return this.models.get(id) as T | undefined
+  }
+
+  /**
+   * The instance of the tool manager.
+   */
+  static instance(): ModelManager {
+    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
+  }
+}
diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts
index 8f1ff70bf..c0de0f5e8 100644
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@@ -69,9 +69,11 @@ export enum DownloadRoute {
 }
 
 export enum DownloadEvent {
-  onFileDownloadUpdate = 'onFileDownloadUpdate',
-  onFileDownloadError = 'onFileDownloadError',
-  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadUpdate = 'DownloadUpdated',
+  onFileDownloadError = 'DownloadError',
+  onFileDownloadSuccess = 'DownloadSuccess',
+  onFileDownloadStopped = 'DownloadStopped',
+  onFileDownloadStarted = 'DownloadStarted',
   onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
 
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 933c698c3..ed1db94bd 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
  */
 export type ModelInfo = {
   id: string
-  settings: ModelSettingParams
-  parameters: ModelRuntimeParams
+  settings?: ModelSettingParams
+  parameters?: ModelRuntimeParams
   engine?: InferenceEngine
 }
 
@@ -28,9 +28,10 @@ export enum InferenceEngine {
   nitro_tensorrt_llm = 'nitro-tensorrt-llm',
   cohere = 'cohere',
   nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
-  cortex_onnx = 'cortex.onnx',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex = 'cortex',
+  cortex_llamacpp = 'llama-cpp',
+  cortex_onnx = 'onnxruntime',
+  cortex_tensorrtllm = '.tensorrt-llm',
 }
 
 export type ModelArtifact = {
@@ -153,8 +154,3 @@ export type ModelRuntimeParams = {
 export type ModelInitFailed = Model & {
   error: Error
 }
-
-/**
- * ModelFile is the model.json entity and it's file metadata
- */
-export type ModelFile = Model & FileMetadata
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index 08d456b7e..088118f69 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
-import { Model, ModelFile } from './modelEntity'
+import { Model } from './modelEntity'
+import { OptionType } from './modelImport'
 
 /**
  * Model extension for managing models.
@@ -8,38 +8,41 @@ export interface ModelInterface {
   /**
    * Downloads a model.
    * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model has been downloaded.
    */
-  downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void>
+  pullModel(model: string): Promise<void>
 
   /**
    * Cancels the download of a specific model.
    * @param {string} modelId - The ID of the model to cancel the download for.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(modelId: string): Promise<void>
 
   /**
    * Deletes a model.
    * @param modelId - The ID of the model to delete.
    * @returns A Promise that resolves when the model has been deleted.
    */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>
 
   /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
    * @returns A Promise that resolves with an array of downloaded models.
    */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>
 
   /**
-   * Gets a list of configured models.
-   * @returns A Promise that resolves with an array of configured models.
+   * Update a pulled model's metadata
+   * @param model - The model to update.
+   * @returns A Promise that resolves when the model has been updated.
    */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
+
+  /**
+   * Import an existing model file.
+   * @param model id of the model to import
+   * @param modelPath - path of the model file
+   */
+  importModel(model: string, modePath: string): Promise<void>
 }
diff --git a/docs/src/pages/docs/built-in/llama-cpp.mdx b/docs/src/pages/docs/built-in/llama-cpp.mdx
index 5b7b0453a..8e2fa8498 100644
--- a/docs/src/pages/docs/built-in/llama-cpp.mdx
+++ b/docs/src/pages/docs/built-in/llama-cpp.mdx
@@ -102,7 +102,7 @@ Enable the GPU acceleration option within the Jan application by following the [
     ],
     "size": 669000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
 ```
 ### Step 2: Modify the `model.json`
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index 28d629aa8..3386029fa 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'
 
-import path from 'path'
-
 export class Retrieval {
   public chunkSize: number = 100
   public chunkOverlap?: number = 0
diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-cortex-extension/.gitignore
similarity index 100%
rename from extensions/inference-nitro-extension/.gitignore
rename to extensions/inference-cortex-extension/.gitignore
diff --git a/extensions/inference-nitro-extension/README.md b/extensions/inference-cortex-extension/README.md
similarity index 100%
rename from extensions/inference-nitro-extension/README.md
rename to extensions/inference-cortex-extension/README.md
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-cortex-extension/download.bat
similarity index 93%
rename from extensions/inference-nitro-extension/download.bat
rename to extensions/inference-cortex-extension/download.bat
index 7acd385d5..d764b6df8 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -4,10 +4,10 @@ set /p CORTEX_VERSION=<./bin/version.txt
 
 @REM Download cortex.llamacpp binaries
 set VERSION=v0.1.25
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64
 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
 
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
new file mode 100755
index 000000000..fe1f8af9f
--- /dev/null
+++ b/extensions/inference-cortex-extension/download.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Read CORTEX_VERSION
+CORTEX_VERSION=$(cat ./bin/version.txt)
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34"
+# Detect platform
+OS_TYPE=$(uname)
+
+if [ "$OS_TYPE" == "Linux" ]; then
+    # Linux downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
+    chmod +x "./bin/cortex"
+
+    # Download engines for Linux
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
+
+elif [ "$OS_TYPE" == "Darwin" ]; then
+    # macOS downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz"  -e --strip 1 -o "./bin" 1
+    chmod +x "./bin/cortex"
+
+    # Download engines for macOS
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
+
+else
+    echo "Unsupported operating system: $OS_TYPE"
+    exit 1
+fi
diff --git a/extensions/inference-nitro-extension/jest.config.js b/extensions/inference-cortex-extension/jest.config.js
similarity index 100%
rename from extensions/inference-nitro-extension/jest.config.js
rename to extensions/inference-cortex-extension/jest.config.js
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-cortex-extension/package.json
similarity index 82%
rename from extensions/inference-nitro-extension/package.json
rename to extensions/inference-cortex-extension/package.json
index 15ceaf566..920989f3b 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -10,12 +10,12 @@
   "scripts": {
     "test": "jest",
     "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "downloadcortex:linux:darwin": "./download.sh",
+    "downloadcortex:win32": "download.bat",
+    "downloadcortex": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
   },
   "exports": {
     ".": "./dist/index.js",
@@ -50,6 +50,7 @@
     "cpu-instructions": "^0.0.13",
     "decompress": "^4.2.1",
     "fetch-retry": "^5.0.6",
+    "ky": "^1.7.2",
     "rxjs": "^7.8.1",
     "tcp-port-used": "^1.0.2",
     "terminate": "2.6.1",
diff --git a/extensions/inference-nitro-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
similarity index 100%
rename from extensions/inference-nitro-extension/resources/default_settings.json
rename to extensions/inference-cortex-extension/resources/default_settings.json
diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
rename to extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
index 8c3029be0..f6e3d08e9 100644
--- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@@ -31,5 +31,5 @@
       "tags": ["34B", "Finetuned"],
       "size": 21556982144
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
rename to extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
index 163373014..463f7eec7 100644
--- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@@ -31,5 +31,5 @@
       "tags": ["7B", "Finetuned"],
       "size": 5056982144
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/bakllava-1/model.json
rename to extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
index 93f87c7f4..391c93990 100644
--- a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@@ -31,5 +31,5 @@
     "tags": ["Vision"],
     "size": 5750000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
index fb2a5f346..7bd5bf3a4 100644
--- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@@ -30,5 +30,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
rename to extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
index f90f848dd..2cce063e6 100644
--- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@@ -31,6 +31,6 @@
       "tags": ["22B", "Finetuned", "Featured"],
       "size": 13341237440
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
index 6b166eea5..13518604c 100644
--- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@@ -31,6 +31,6 @@
       "tags": ["34B", "Finetuned"],
       "size": 21500000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
index 4d825cfeb..6722d253d 100644
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Tiny"],
     "size": 1430000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
index e87d6a643..8a2e271cd 100644
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["33B"],
     "size": 19940000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
index 837b10ce3..3278c9a81 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@@ -31,5 +31,5 @@
     "tags": ["2B", "Finetuned", "Tiny"],
     "size": 1630000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
index b29043483..9a57f9b37 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 5330000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
index 4db74ac6f..66eaff7c2 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 16600000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
index d85759f9b..60be558b8 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@@ -38,5 +38,5 @@
     ],
     "size": 1710000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
index 8f6af15d9..67acaad09 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 5760000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
index 0c770b189..c91a0a73b 100644
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@@ -31,5 +31,5 @@
     "tags": ["70B", "Foundational Model"],
     "size": 43920000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
index 9efd634b5..4a28f6004 100644
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Foundational Model"],
     "size": 4080000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
index 4d84b9967..3456a185e 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@@ -31,5 +31,5 @@
       "tags": ["8B"],
       "size": 4920000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
index a3601c8cd..718629fb0 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 4920000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
similarity index 98%
rename from extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
index 1f4931e11..aec73719e 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 42500000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
similarity index 98%
rename from extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
index 4b21534bc..ec9a0284b 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 4920000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
index 5be08409d..0fe7d3316 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@@ -31,5 +31,5 @@
     "tags": ["1B", "Featured"],
     "size": 1320000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
index aacb3f0f8..299362fbf 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@@ -31,5 +31,5 @@
     "tags": ["3B", "Featured"],
     "size": 3420000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
rename to extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
index 94b62ec82..3230df5b0 100644
--- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 1170000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llava-13b/model.json
rename to extensions/inference-cortex-extension/resources/models/llava-13b/model.json
index 6d94fd272..791c98749 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@@ -32,5 +32,5 @@
     "tags": ["Vision"],
     "size": 7870000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llava-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/llava-7b/model.json
index 1fdd75247..b22899c96 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@@ -32,5 +32,5 @@
     "tags": ["Vision"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
rename to extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
index 88f701466..9b568e468 100644
--- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@@ -32,5 +32,5 @@
     "size": 4370000000,
     "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
index 4413b415c..c711065ff 100644
--- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@@ -30,5 +30,5 @@
     "tags": ["70B", "Foundational Model"],
     "size": 26440000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
index 10c17c310..1999035aa 100644
--- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
index e743a74c9..05371b69e 100644
--- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Recommended", "7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
rename to extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
index 6459b049d..a2197dab2 100644
--- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 2320000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
rename to extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
index 7331b2fd8..f7131ee98 100644
--- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 8366000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phind-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/phind-34b/model.json
index 14099a635..f6e302173 100644
--- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["34B", "Finetuned"],
     "size": 20220000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
index 85081a605..be37cac0d 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4770000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
index a7613982c..210848a43 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
index 04913b874..96e4d214c 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["14B", "Featured"],
       "size": 8990000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
index 43ba30c56..20681dff4 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["32B"],
       "size": 19900000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
index 1852a0909..b741539eb 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["72B"],
       "size": 47400000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
index b47511f96..6741aef64 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Featured"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
index 2f1080b2c..9162c8a43 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Featured"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
rename to extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
index 938e03fb7..a6c84bd17 100644
--- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
@@ -31,5 +31,5 @@
       "tags": ["3B", "Finetuned", "Tiny"],
       "size": 2970000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
index c17d1c35e..ffb32922e 100644
--- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
@@ -30,5 +30,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
rename to extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
index a49e79073..b6aeea3e3 100644
--- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Tiny", "Foundation Model"],
     "size": 669000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
index 6c9aa2b89..fae5d0ca5 100644
--- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
@@ -31,5 +31,5 @@
     "size": 4370000000,
     "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
index b84f2c676..46b6999a6 100644
--- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4410000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
rename to extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
index 101eedfd1..cf39ad857 100644
--- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Recommended", "13B", "Finetuned"],
     "size": 7870000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/yi-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/yi-34b/model.json
index db7df9f2d..4f56650d7 100644
--- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["34B", "Foundational Model"],
     "size": 20660000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
similarity index 93%
rename from extensions/inference-nitro-extension/rollup.config.ts
rename to extensions/inference-cortex-extension/rollup.config.ts
index 1a8badb6f..d0e9f5fbe 100644
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -114,19 +114,7 @@ export default [
         ]),
         NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        INFERENCE_URL: JSON.stringify(
-          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/server/chat_completion'
-        ),
-        TROUBLESHOOTING_URL: JSON.stringify(
-          'https://jan.ai/guides/troubleshooting'
-        ),
-        JAN_SERVER_INFERENCE_URL: JSON.stringify(
-          'http://localhost:1337/v1/chat/completions'
-        ),
-        CUDA_DOWNLOAD_URL: JSON.stringify(
-          'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
-        ),
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291/v1'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts
similarity index 70%
rename from extensions/inference-nitro-extension/src/@types/global.d.ts
rename to extensions/inference-cortex-extension/src/@types/global.d.ts
index 85c9b939f..64ae5a6e7 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@@ -1,7 +1,5 @@
 declare const NODE: string
-declare const INFERENCE_URL: string
-declare const TROUBLESHOOTING_URL: string
-declare const JAN_SERVER_INFERENCE_URL: string
+declare const CORTEX_API_URL: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
 
diff --git a/extensions/inference-nitro-extension/src/babel.config.js b/extensions/inference-cortex-extension/src/babel.config.js
similarity index 100%
rename from extensions/inference-nitro-extension/src/babel.config.js
rename to extensions/inference-cortex-extension/src/babel.config.js
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
new file mode 100644
index 000000000..93036fc4d
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -0,0 +1,111 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module inference-extension/src/index
+ */
+
+import {
+  Model,
+  executeOnMain,
+  systemInformation,
+  log,
+  joinPath,
+  dirName,
+  LocalOAIEngine,
+  InferenceEngine,
+} from '@janhq/core'
+
+import ky from 'ky'
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class JanInferenceCortexExtension extends LocalOAIEngine {
+  // DEPRECATED
+  nodeModule: string = 'node'
+
+  provider: string = InferenceEngine.cortex
+
+  /**
+   * The URL for making inference requests.
+   */
+  inferenceUrl = `${CORTEX_API_URL}/chat/completions`
+
+  /**
+   * Subscribes to events emitted by the @janhq/core package.
+   */
+  async onLoad() {
+    const models = MODELS as Model[]
+
+    this.registerModels(models)
+
+    super.onLoad()
+
+    // Run the process watchdog
+    const systemInfo = await systemInformation()
+    executeOnMain(NODE, 'run', systemInfo)
+  }
+
+  onUnload(): void {
+    executeOnMain(NODE, 'dispose')
+    super.onUnload()
+  }
+
+  override async loadModel(
+    model: Model & { file_path?: string }
+  ): Promise<void> {
+    // Legacy model cache - should import
+    if (model.engine === InferenceEngine.nitro && model.file_path) {
+      // Try importing the model
+      await ky
+        .post(`${CORTEX_API_URL}/models/${model.id}`, {
+          json: { model: model.id, modelPath: await this.modelPath(model) },
+        })
+        .json()
+        .catch((e) => log(e.message ?? e ?? ''))
+    }
+
+    return ky
+      .post(`${CORTEX_API_URL}/models/start`, {
+        json: {
+          ...model.settings,
+          model: model.id,
+          engine:
+            model.engine === InferenceEngine.nitro // Legacy model cache
+              ? InferenceEngine.cortex_llamacpp
+              : model.engine,
+        },
+      })
+      .json()
+      .catch(async (e) => {
+        throw (await e.response?.json()) ?? e
+      })
+      .then()
+  }
+
+  override async unloadModel(model: Model): Promise<void> {
+    return ky
+      .post(`${CORTEX_API_URL}/models/stop`, {
+        json: { model: model.id },
+      })
+      .json()
+      .then()
+  }
+
+  private async modelPath(
+    model: Model & { file_path?: string }
+  ): Promise<string> {
+    if (!model.file_path) return model.id
+    return await joinPath([
+      await dirName(model.file_path),
+      model.sources[0]?.filename ??
+        model.settings?.llama_model_path ??
+        model.sources[0]?.url.split('/').pop() ??
+        model.id,
+    ])
+  }
+}
diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
similarity index 83%
rename from extensions/inference-nitro-extension/src/node/execute.test.ts
rename to extensions/inference-cortex-extension/src/node/execute.test.ts
index dfd8b35a9..89110fbd9 100644
--- a/extensions/inference-nitro-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableNitroFile } from './execute'
+import { executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 
@@ -30,7 +30,7 @@ jest.mock('cpu-instructions', () => ({
 let mock = cpuInfo.cpuInfo as jest.Mock
 mock.mockReturnValue([])
 
-describe('test executable nitro file', () => {
+describe('test executable cortex file', () => {
   afterAll(function () {
     Object.defineProperty(process, 'platform', {
       value: originalPlatform,
@@ -44,10 +44,13 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'arm64',
     })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`mac-arm64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`/cortex`)
+            : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -55,10 +58,13 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'x64',
     })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`mac-x64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`/cortex`)
+            : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -73,10 +79,10 @@ describe('test executable nitro file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -107,10 +113,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -141,10 +147,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -159,10 +165,10 @@ describe('test executable nitro file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -193,10 +199,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -227,10 +233,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -251,10 +257,10 @@ describe('test executable nitro file', () => {
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
 
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -274,10 +280,10 @@ describe('test executable nitro file', () => {
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`win-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          executablePath: expect.stringContaining(`/cortex.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
@@ -313,10 +319,10 @@ describe('test executable nitro file', () => {
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`win-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          executablePath: expect.stringContaining(`/cortex.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -352,10 +358,10 @@ describe('test executable nitro file', () => {
     }
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -392,10 +398,10 @@ describe('test executable nitro file', () => {
     }
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-vulkan`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -418,10 +424,13 @@ describe('test executable nitro file', () => {
         run_mode: 'cpu',
       }
       mock.mockReturnValue([])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`mac-x64`),
-          executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+          executablePath:
+            originalPlatform === 'darwin'
+              ? expect.stringContaining(`/cortex`)
+              : expect.anything(),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
similarity index 93%
rename from extensions/inference-nitro-extension/src/node/execute.ts
rename to extensions/inference-cortex-extension/src/node/execute.ts
index 595063ed4..0febe8adf 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -2,7 +2,7 @@ import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
 
-export interface NitroExecutableOptions {
+export interface CortexExecutableOptions {
   enginePath: string
   executablePath: string
   cudaVisibleDevices: string
@@ -81,9 +81,9 @@ const cpuInstructions = () => {
  * Find which executable file to run based on the current platform.
  * @returns The name of the executable file to run.
  */
-export const executableNitroFile = (
+export const executableCortexFile = (
   gpuSetting?: GpuSetting
-): NitroExecutableOptions => {
+): CortexExecutableOptions => {
   let engineFolder = [
     os(),
     ...(gpuSetting?.vulkan
@@ -99,7 +99,7 @@ export const executableNitroFile = (
     .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
+  let binaryName = `cortex${extension()}`
 
   return {
     enginePath: path.join(__dirname, '..', 'bin', engineFolder),
diff --git a/extensions/inference-cortex-extension/src/node/index.test.ts b/extensions/inference-cortex-extension/src/node/index.test.ts
new file mode 100644
index 000000000..ff9d7c2fc
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@@ -0,0 +1,94 @@
+jest.mock('@janhq/core/node', () => ({
+  ...jest.requireActual('@janhq/core/node'),
+  getJanDataFolderPath: () => '',
+  getSystemResourceInfo: () => {
+    return {
+      cpu: {
+        cores: 1,
+        logicalCores: 1,
+        threads: 1,
+        model: 'model',
+        speed: 1,
+      },
+      memory: {
+        total: 1,
+        free: 1,
+      },
+      gpu: {
+        model: 'model',
+        memory: 1,
+        cuda: {
+          version: 'version',
+          devices: 'devices',
+        },
+        vulkan: {
+          version: 'version',
+          devices: 'devices',
+        },
+      },
+    }
+  },
+}))
+
+jest.mock('fs', () => ({
+  default: {
+    readdirSync: () => [],
+  },
+}))
+
+jest.mock('child_process', () => ({
+  exec: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+    }
+  },
+  spawn: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+      pid: '111',
+    }
+  },
+}))
+
+jest.mock('./execute', () => ({
+  executableCortexFile: () => {
+    return {
+      enginePath: 'enginePath',
+      executablePath: 'executablePath',
+      cudaVisibleDevices: 'cudaVisibleDevices',
+      vkVisibleDevices: 'vkVisibleDevices',
+    }
+  },
+}))
+
+import index from './index'
+
+describe('dispose', () => {
+  it('should dispose a model successfully on Mac', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+
+    // Call the dispose function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+
+  it('should kill the subprocess successfully on Windows', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'win32',
+    })
+
+    // Call the killSubprocess function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+})
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
new file mode 100644
index 000000000..f1c365ade
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -0,0 +1,83 @@
+import path from 'path'
+import { log, SystemInformation } from '@janhq/core/node'
+import { executableCortexFile } from './execute'
+import { ProcessWatchdog } from './watchdog'
+
+// The HOST address to use for the Nitro subprocess
+const LOCAL_PORT = '39291'
+let watchdog: ProcessWatchdog | undefined = undefined
+
+/**
+ * Spawns a Nitro subprocess.
+ * @returns A promise that resolves when the Nitro subprocess is started.
+ */
+function run(systemInfo?: SystemInformation): Promise<any> {
+  log(`[CORTEX]:: Spawning cortex subprocess...`)
+
+  return new Promise<void>(async (resolve, reject) => {
+    let executableOptions = executableCortexFile(
+      // If ngl is not set or equal to 0, run on CPU with correct instructions
+      systemInfo?.gpuSetting
+        ? {
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
+        : undefined
+    )
+
+    // Execute the binary
+    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
+    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
+
+    // Add engine path to the PATH and LD_LIBRARY_PATH
+    process.env.PATH = (process.env.PATH || '').concat(
+      path.delimiter,
+      executableOptions.enginePath
+    )
+    log(`[CORTEX] PATH: ${process.env.PATH}`)
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      executableOptions.enginePath
+    )
+
+    watchdog = new ProcessWatchdog(
+      executableOptions.executablePath,
+      ['--start-server', '--port', LOCAL_PORT.toString()],
+      {
+        cwd: executableOptions.enginePath,
+        env: {
+          ...process.env,
+          ENGINE_PATH: executableOptions.enginePath,
+          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+          // Vulkan - Support 1 device at a time for now
+          ...(executableOptions.vkVisibleDevices?.length > 0 && {
+            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
+          }),
+        },
+      }
+    )
+    watchdog.start()
+    resolve()
+  })
+}
+
+/**
+ * Every module should have a dispose function
+ * This will be called when the extension is unloaded and should clean up any resources
+ * Also called when app is closed
+ */
+function dispose() {
+  watchdog?.terminate()
+}
+
+/**
+ * Cortex process info
+ */
+export interface CortexProcessInfo {
+  isRunning: boolean
+}
+
+export default {
+  run,
+  dispose,
+}
diff --git a/extensions/inference-cortex-extension/src/node/watchdog.ts b/extensions/inference-cortex-extension/src/node/watchdog.ts
new file mode 100644
index 000000000..3e2b81d70
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@@ -0,0 +1,84 @@
+import { log } from '@janhq/core/node'
+import { spawn, ChildProcess } from 'child_process'
+import { EventEmitter } from 'events'
+
+interface WatchdogOptions {
+  cwd?: string
+  restartDelay?: number
+  maxRestarts?: number
+  env?: NodeJS.ProcessEnv
+}
+
+export class ProcessWatchdog extends EventEmitter {
+  private command: string
+  private args: string[]
+  private options: WatchdogOptions
+  private process: ChildProcess | null
+  private restartDelay: number
+  private maxRestarts: number
+  private restartCount: number
+  private isTerminating: boolean
+
+  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
+    super()
+    this.command = command
+    this.args = args
+    this.options = options
+    this.process = null
+    this.restartDelay = options.restartDelay || 5000
+    this.maxRestarts = options.maxRestarts || 5
+    this.restartCount = 0
+    this.isTerminating = false
+  }
+
+  start(): void {
+    this.spawnProcess()
+  }
+
+  private spawnProcess(): void {
+    if (this.isTerminating) return
+
+    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
+    this.process = spawn(this.command, this.args, this.options)
+
+    this.process.stdout?.on('data', (data: Buffer) => {
+      log(`Process output: ${data}`)
+      this.emit('output', data.toString())
+    })
+
+    this.process.stderr?.on('data', (data: Buffer) => {
+      log(`Process error: ${data}`)
+      this.emit('error', data.toString())
+    })
+
+    this.process.on('close', (code: number | null) => {
+      log(`Process exited with code ${code}`)
+      this.emit('close', code)
+      if (!this.isTerminating) {
+        this.restartProcess()
+      }
+    })
+  }
+
+  private restartProcess(): void {
+    if (this.restartCount < this.maxRestarts) {
+      this.restartCount++
+      log(
+        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
+      )
+      setTimeout(() => this.spawnProcess(), this.restartDelay)
+    } else {
+      log('Max restart attempts reached. Exiting watchdog.')
+      this.emit('maxRestartsReached')
+    }
+  }
+
+  terminate(): void {
+    this.isTerminating = true
+    if (this.process) {
+      log('Terminating watched process...')
+      this.process.kill()
+    }
+    this.emit('terminated')
+  }
+}
diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-cortex-extension/tsconfig.json
similarity index 100%
rename from extensions/inference-nitro-extension/tsconfig.json
rename to extensions/inference-cortex-extension/tsconfig.json
diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt
deleted file mode 100644
index 8f0916f76..000000000
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-0.5.0
diff --git a/extensions/inference-nitro-extension/download.sh b/extensions/inference-nitro-extension/download.sh
deleted file mode 100755
index 98ed8504a..000000000
--- a/extensions/inference-nitro-extension/download.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Read CORTEX_VERSION
-CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-
-# Detect platform
-OS_TYPE=$(uname)
-
-if [ "$OS_TYPE" == "Linux" ]; then
-    # Linux downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
-    chmod +x "./bin/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
-
-    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
-
-elif [ "$OS_TYPE" == "Darwin" ]; then
-    # macOS downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz"  -e --strip 1 -o "./bin/mac-arm64" 1
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz"  -e --strip 1 -o "./bin/mac-x64" 1
-    chmod +x "./bin/mac-arm64/cortex-cpp"
-    chmod +x "./bin/mac-x64/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
-    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
-
-else
-    echo "Unsupported operating system: $OS_TYPE"
-    exit 1
-fi
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
deleted file mode 100644
index 6e825e8fd..000000000
--- a/extensions/inference-nitro-extension/src/index.ts
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-extension/src/index
- */
-
-import {
-  events,
-  executeOnMain,
-  Model,
-  ModelEvent,
-  LocalOAIEngine,
-  InstallationState,
-  systemInformation,
-  fs,
-  getJanDataFolderPath,
-  joinPath,
-  DownloadRequest,
-  baseName,
-  downloadFile,
-  DownloadState,
-  DownloadEvent,
-  ModelFile,
-} from '@janhq/core'
-
-declare const CUDA_DOWNLOAD_URL: string
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceNitroExtension extends LocalOAIEngine {
-  nodeModule: string = NODE
-  provider: string = 'nitro'
-
-  /**
-   * Checking the health for Nitro's process each 5 secs.
-   */
-  private static readonly _intervalHealthCheck = 5 * 1000
-
-  /**
-   * The interval id for the health check. Used to stop the health check.
-   */
-  private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined
-
-  /**
-   * Tracking the current state of nitro process.
-   */
-  private nitroProcessInfo: any = undefined
-
-  /**
-   * The URL for making inference requests.
-   */
-  inferenceUrl = ''
-
-  /**
-   * Subscribes to events emitted by the @janhq/core package.
-   */
-  async onLoad() {
-    this.inferenceUrl = INFERENCE_URL
-
-    // If the extension is running in the browser, use the base API URL from the core package.
-    if (!('electronAPI' in window)) {
-      this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
-    }
-
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    const models = MODELS as unknown as Model[]
-    this.registerModels(models)
-    super.onLoad()
-
-    // Add additional dependencies PATH to the env
-    executeOnMain(NODE, 'addAdditionalDependencies', {
-      name: this.name,
-      version: this.version,
-    })
-  }
-
-  /**
-   * Periodically check for nitro process's health.
-   */
-  private async periodicallyGetNitroHealth(): Promise<void> {
-    const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
-
-    const isRunning = this.nitroProcessInfo?.isRunning ?? false
-    if (isRunning && health.isRunning === false) {
-      console.debug('Nitro process is stopped')
-      events.emit(ModelEvent.OnModelStopped, {})
-    }
-    this.nitroProcessInfo = health
-  }
-
-  override loadModel(model: ModelFile): Promise<void> {
-    if (model.engine !== this.provider) return Promise.resolve()
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    return super.loadModel(model)
-  }
-
-  override async unloadModel(model?: Model): Promise<void> {
-    if (model?.engine && model.engine !== this.provider) return
-
-    // stop the periocally health check
-    if (this.getNitroProcessHealthIntervalId) {
-      clearInterval(this.getNitroProcessHealthIntervalId)
-      this.getNitroProcessHealthIntervalId = undefined
-    }
-    return super.unloadModel(model)
-  }
-
-  override async install(): Promise<void> {
-    const info = await systemInformation()
-
-    const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux'
-    const downloadUrl = CUDA_DOWNLOAD_URL
-
-    const url = downloadUrl
-      .replace('<version>', info.gpuSetting?.cuda?.version ?? '12.4')
-      .replace('<platform>', platform)
-
-    console.debug('Downloading Cuda Toolkit Dependency: ', url)
-
-    const janDataFolderPath = await getJanDataFolderPath()
-
-    const executableFolderPath = await joinPath([
-      janDataFolderPath,
-      'engines',
-      this.name ?? 'cortex-cpp',
-      this.version ?? '1.0.0',
-    ])
-
-    if (!(await fs.existsSync(executableFolderPath))) {
-      await fs.mkdir(executableFolderPath)
-    }
-
-    const tarball = await baseName(url)
-    const tarballFullPath = await joinPath([executableFolderPath, tarball])
-
-    const downloadRequest: DownloadRequest = {
-      url,
-      localPath: tarballFullPath,
-      extensionId: this.name,
-      downloadType: 'extension',
-    }
-    downloadFile(downloadRequest)
-
-    const onFileDownloadSuccess = async (state: DownloadState) => {
-      console.log(state)
-      // if other download, ignore
-      if (state.fileName !== tarball) return
-      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      await executeOnMain(
-        NODE,
-        'decompressRunner',
-        tarballFullPath,
-        executableFolderPath
-      )
-      events.emit(DownloadEvent.onFileUnzipSuccess, state)
-    }
-    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-  }
-
-  override async installationState(): Promise<InstallationState> {
-    const info = await systemInformation()
-    if (
-      info.gpuSetting?.run_mode === 'gpu' &&
-      !info.gpuSetting?.vulkan &&
-      info.osInfo &&
-      info.osInfo.platform !== 'darwin' &&
-      !info.gpuSetting?.cuda?.exist
-    ) {
-      const janDataFolderPath = await getJanDataFolderPath()
-
-      const executableFolderPath = await joinPath([
-        janDataFolderPath,
-        'engines',
-        this.name ?? 'cortex-cpp',
-        this.version ?? '1.0.0',
-      ])
-
-      if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled'
-      return 'Installed'
-    }
-    return 'NotRequired'
-  }
-}
diff --git a/extensions/inference-nitro-extension/src/node/index.test.ts b/extensions/inference-nitro-extension/src/node/index.test.ts
deleted file mode 100644
index 6e64b4a06..000000000
--- a/extensions/inference-nitro-extension/src/node/index.test.ts
+++ /dev/null
@@ -1,465 +0,0 @@
-jest.mock('fetch-retry', () => ({
-  default: () => () => {
-    return Promise.resolve({
-      ok: true,
-      status: 200,
-      json: () =>
-        Promise.resolve({
-          model_loaded: true,
-        }),
-      text: () => Promise.resolve(''),
-    })
-  },
-}))
-
-jest.mock('path', () => ({
-  default: {
-    isAbsolute: jest.fn(),
-    join: jest.fn(),
-    parse: () => {
-      return { dir: 'dir' }
-    },
-    delimiter: { concat: () => '' },
-  },
-}))
-
-jest.mock('decompress', () => ({
-  default: () => {
-    return Promise.resolve()
-  },
-}))
-
-jest.mock('@janhq/core/node', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  getJanDataFolderPath: () => '',
-  getSystemResourceInfo: () => {
-    return {
-      cpu: {
-        cores: 1,
-        logicalCores: 1,
-        threads: 1,
-        model: 'model',
-        speed: 1,
-      },
-      memory: {
-        total: 1,
-        free: 1,
-      },
-      gpu: {
-        model: 'model',
-        memory: 1,
-        cuda: {
-          version: 'version',
-          devices: 'devices',
-        },
-        vulkan: {
-          version: 'version',
-          devices: 'devices',
-        },
-      },
-    }
-  },
-}))
-
-jest.mock('fs', () => ({
-  default: {
-    readdirSync: () => [],
-  },
-}))
-
-jest.mock('child_process', () => ({
-  exec: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-    }
-  },
-  spawn: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-      pid: '111',
-    }
-  },
-}))
-
-jest.mock('tcp-port-used', () => ({
-  default: {
-    waitUntilFree: () => Promise.resolve(true),
-    waitUntilUsed: () => Promise.resolve(true),
-  },
-}))
-
-jest.mock('./execute', () => ({
-  executableNitroFile: () => {
-    return {
-      enginePath: 'enginePath',
-      executablePath: 'executablePath',
-      cudaVisibleDevices: 'cudaVisibleDevices',
-      vkVisibleDevices: 'vkVisibleDevices',
-    }
-  },
-}))
-
-jest.mock('terminate', () => ({
-  default: (id: String, func: Function) => {
-    console.log(id)
-    func()
-  },
-}))
-
-import * as execute from './execute'
-import index from './index'
-
-let executeMock = execute
-
-const modelInitOptions: any = {
-  modelFolder: '/path/to/model',
-  model: {
-    id: 'test',
-    name: 'test',
-    engine: 'nitro',
-    version: '0.0',
-    format: 'GGUF',
-    object: 'model',
-    sources: [],
-    created: 0,
-    description: 'test',
-    parameters: {},
-    metadata: {
-      author: '',
-      tags: [],
-      size: 0,
-    },
-    settings: {
-      prompt_template: '{prompt}',
-      llama_model_path: 'model.gguf',
-    },
-  },
-}
-
-describe('loadModel', () => {
-  it('should load a model successfully', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Call the loadModel function
-    const result = await index.loadModel(modelInitOptions, systemInfo)
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    modelInitOptions.model.engine = 'not-nitro'
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-    modelInitOptions.model.engine = 'nitro'
-  })
-
-  it('should reject if model load failed with an error message', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_loaded: false,
-            }),
-          text: () => Promise.resolve('Failed to load model'),
-        })
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to load model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-
-  it('should run on GPU model if ngl is set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 40,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'gpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is not set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: undefined,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is 0', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 0,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-})
-
-describe('unloadModel', () => {
-  it('should unload a model successfully', async () => {
-    // Call the unloadModel function
-    const result = await index.unloadModel()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-  })
-
-  it('should reject if model unload failed with an error message', async () => {
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_unloaded: false,
-            }),
-          text: () => Promise.resolve('Failed to unload model'),
-        })
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to unload model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-})
-describe('dispose', () => {
-  it('should dispose a model successfully on Mac', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'darwin',
-    })
-
-    // Call the dispose function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should kill the subprocess successfully on Windows', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'win32',
-    })
-
-    // Call the killSubprocess function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('getCurrentNitroProcessInfo', () => {
-  it('should return the current nitro process info', async () => {
-    // Call the getCurrentNitroProcessInfo function
-    const result = await index.getCurrentNitroProcessInfo()
-
-    // Assert that the result is as expected
-    expect(result).toEqual({
-      isRunning: true,
-    })
-  })
-})
-
-describe('decompressRunner', () => {
-  it('should decompress the runner successfully', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.resolve()
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-  it('should not reject if decompression failed', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.reject('Failed to decompress')
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('addAdditionalDependencies', () => {
-  it('should add additional dependencies successfully', async () => {
-    // Call the addAdditionalDependencies function
-    const result = await index.addAdditionalDependencies({
-      name: 'name',
-      version: 'version',
-    })
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
deleted file mode 100644
index 98ca4572f..000000000
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ /dev/null
@@ -1,501 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
-import tcpPortUsed from 'tcp-port-used'
-import fetchRT from 'fetch-retry'
-import {
-  log,
-  getSystemResourceInfo,
-  InferenceEngine,
-  ModelSettingParams,
-  PromptTemplate,
-  SystemInformation,
-  getJanDataFolderPath,
-  ModelFile,
-} from '@janhq/core/node'
-import { executableNitroFile } from './execute'
-import terminate from 'terminate'
-import decompress from 'decompress'
-
-// Polyfill fetch with retry
-const fetchRetry = fetchRT(fetch)
-
-/**
- * The response object for model init operation.
- */
-interface ModelInitOptions {
-  modelFolder: string
-  model: ModelFile
-}
-// The PORT to use for the Nitro subprocess
-const PORT = 3928
-// The HOST address to use for the Nitro subprocess
-const LOCAL_HOST = '127.0.0.1'
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
-// The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-// The URL for the Nitro subprocess to kill itself
-const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-const NITRO_PORT_FREE_CHECK_INTERVAL = 100
-
-// The supported model format
-// TODO: Should be an array to support more models
-const SUPPORTED_MODEL_FORMAT = '.gguf'
-
-// The subprocess instance for Nitro
-let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
-
-// The current model settings
-let currentSettings: (ModelSettingParams & { model?: string }) | undefined =
-  undefined
-
-/**
- * Stops a Nitro subprocess.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-function unloadModel(): Promise<void> {
-  return killSubprocess()
-}
-
-/**
- * Initializes a Nitro subprocess to load a machine learning model.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
- */
-async function loadModel(
-  params: ModelInitOptions,
-  systemInfo?: SystemInformation
-): Promise<ModelOperationResponse | void> {
-  if (params.model.engine !== InferenceEngine.nitro) {
-    // Not a nitro model
-    return Promise.resolve()
-  }
-
-  if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a cortex model')
-  } else {
-    const nitroResourceProbe = await getSystemResourceInfo()
-    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
-    if (params.model.settings.prompt_template) {
-      const promptTemplate = params.model.settings.prompt_template
-      const prompt = promptTemplateConverter(promptTemplate)
-      if (prompt?.error) {
-        return Promise.reject(prompt.error)
-      }
-      params.model.settings.system_prompt = prompt.system_prompt
-      params.model.settings.user_prompt = prompt.user_prompt
-      params.model.settings.ai_prompt = prompt.ai_prompt
-    }
-
-    // modelFolder is the absolute path to the running model folder
-    // e.g. ~/jan/models/llama-2
-    let modelFolder = params.modelFolder
-
-    let llama_model_path = params.model.settings.llama_model_path
-
-    // Absolute model path support
-    if (
-      params.model?.sources.length &&
-      params.model.sources.every((e) => fs.existsSync(e.url))
-    ) {
-      llama_model_path =
-        params.model.sources.length === 1
-          ? params.model.sources[0].url
-          : params.model.sources.find((e) =>
-              e.url.includes(llama_model_path ?? params.model.id)
-            )?.url
-    }
-
-    if (!llama_model_path || !path.isAbsolute(llama_model_path)) {
-      // Look for GGUF model file
-      const modelFiles: string[] = fs.readdirSync(modelFolder)
-      const ggufBinFile = modelFiles.find(
-        (file) =>
-          // 1. Prioritize llama_model_path (predefined)
-          (llama_model_path && file === llama_model_path) ||
-          // 2. Prioritize GGUF File (manual import)
-          file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ||
-          // 3. Fallback Model ID (for backward compatibility)
-          file === params.model.id
-      )
-      if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile)
-    }
-
-    // Look for absolute source path for single model
-
-    if (!llama_model_path) return Promise.reject('No GGUF model file found')
-
-    currentSettings = {
-      cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
-      // model.settings can override the default settings
-      ...params.model.settings,
-      llama_model_path,
-      model: params.model.id,
-      // This is critical and requires real CPU physical core count (or performance core)
-      ...(params.model.settings.mmproj && {
-        mmproj: path.isAbsolute(params.model.settings.mmproj)
-          ? params.model.settings.mmproj
-          : path.join(modelFolder, params.model.settings.mmproj),
-      }),
-    }
-    return runNitroAndLoadModel(params.model.id, systemInfo)
-  }
-}
-
-/**
- * 1. Spawn Nitro process
- * 2. Load model into Nitro subprocess
- * 3. Validate model status
- * @returns
- */
-async function runNitroAndLoadModel(
-  modelId: string,
-  systemInfo?: SystemInformation
-) {
-  // Gather system information for CPU physical cores and memory
-  return killSubprocess()
-    .then(() =>
-      tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-    )
-    .then(() => spawnNitroProcess(systemInfo))
-    .then(() => loadLLMModel(currentSettings))
-    .then(() => validateModelStatus(modelId))
-    .catch((err) => {
-      // TODO: Broadcast error so app could display proper error message
-      log(`[CORTEX]::Error: ${err}`)
-      return { error: err }
-    })
-}
-
-/**
- * Parse prompt template into agrs settings
- * @param promptTemplate Template as string
- * @returns
- */
-function promptTemplateConverter(promptTemplate: string): PromptTemplate {
-  // Split the string using the markers
-  const systemMarker = '{system_message}'
-  const promptMarker = '{prompt}'
-
-  if (
-    promptTemplate.includes(systemMarker) &&
-    promptTemplate.includes(promptMarker)
-  ) {
-    // Find the indices of the markers
-    const systemIndex = promptTemplate.indexOf(systemMarker)
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-
-    // Extract the parts of the string
-    const system_prompt = promptTemplate.substring(0, systemIndex)
-    const user_prompt = promptTemplate.substring(
-      systemIndex + systemMarker.length,
-      promptIndex
-    )
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { system_prompt, user_prompt, ai_prompt }
-  } else if (promptTemplate.includes(promptMarker)) {
-    // Extract the parts of the string for the case where only promptMarker is present
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-    const user_prompt = promptTemplate.substring(0, promptIndex)
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { user_prompt, ai_prompt }
-  }
-
-  // Return an error if none of the conditions are met
-  return { error: 'Cannot split prompt template' }
-}
-
-/**
- * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- */
-function loadLLMModel(settings: any): Promise<Response> {
-  if (!settings?.ngl) {
-    settings.ngl = 100
-  }
-  log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`)
-  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    body: JSON.stringify(settings),
-    retries: 3,
-    retryDelay: 300,
-  })
-    .then((res) => {
-      log(
-        `[CORTEX]:: Load model success with response ${JSON.stringify(
-          res
-        )}`
-      )
-      return Promise.resolve(res)
-    })
-    .catch((err) => {
-      log(`[CORTEX]::Error: Load model failed with error ${err}`)
-      return Promise.reject(err)
-    })
-}
-
-/**
- * Validates the status of a model.
- * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
- * If the model is loaded successfully, the object is empty.
- * If the model is not loaded successfully, the object contains an error message.
- */
-async function validateModelStatus(modelId: string): Promise<void> {
-  // Send a GET request to the validation URL.
-  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
-  log(`[CORTEX]:: Validating model ${modelId}`)
-  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'POST',
-    body: JSON.stringify({
-      model: modelId,
-      // TODO: force to use cortex llamacpp by default
-      engine: 'cortex.llamacpp',
-    }),
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    retries: 5,
-    retryDelay: 300,
-  }).then(async (res: Response) => {
-    log(
-      `[CORTEX]:: Validate model state with response ${JSON.stringify(
-        res.status
-      )}`
-    )
-    // If the response is OK, check model_loaded status.
-    if (res.ok) {
-      const body = await res.json()
-      // If the model is loaded, return an empty object.
-      // Otherwise, return an object with an error message.
-      if (body.model_loaded) {
-        log(
-          `[CORTEX]:: Validate model state success with response ${JSON.stringify(
-            body
-          )}`
-        )
-        return Promise.resolve()
-      }
-    }
-    const errorBody = await res.text()
-    log(
-      `[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
-        res.statusText
-      )}`
-    )
-    return Promise.reject('Validate model status failed')
-  })
-}
-
-/**
- * Terminates the Nitro subprocess.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-async function killSubprocess(): Promise<void> {
-  const controller = new AbortController()
-  setTimeout(() => controller.abort(), 5000)
-  log(`[CORTEX]:: Request to kill cortex`)
-
-  const killRequest = () => {
-    return fetch(NITRO_HTTP_KILL_URL, {
-      method: 'DELETE',
-      signal: controller.signal,
-    })
-      .catch(() => {}) // Do nothing with this attempt
-      .then(() =>
-        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-      )
-      .then(() => log(`[CORTEX]:: cortex process is terminated`))
-      .catch((err) => {
-        log(
-          `[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
-        )
-        throw 'PORT_NOT_AVAILABLE'
-      })
-  }
-
-  if (subprocess?.pid && process.platform !== 'darwin') {
-    log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
-    const pid = subprocess.pid
-    return new Promise((resolve, reject) => {
-      terminate(pid, function (err) {
-        if (err) {
-          log('[CORTEX]::Failed to kill PID - sending request to kill')
-          killRequest().then(resolve).catch(reject)
-        } else {
-          tcpPortUsed
-            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-            .then(() => log(`[CORTEX]:: cortex process is terminated`))
-            .then(() => resolve())
-            .catch(() => {
-              log(
-                '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill'
-              )
-              killRequest().then(resolve).catch(reject)
-            })
-        }
-      })
-    })
-  } else {
-    return killRequest()
-  }
-}
-
-/**
- * Spawns a Nitro subprocess.
- * @returns A promise that resolves when the Nitro subprocess is started.
- */
-function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[CORTEX]:: Spawning cortex subprocess...`)
-
-  return new Promise<void>(async (resolve, reject) => {
-    let executableOptions = executableNitroFile(
-      // If ngl is not set or equal to 0, run on CPU with correct instructions
-      systemInfo?.gpuSetting
-        ? {
-            ...systemInfo.gpuSetting,
-            run_mode:
-              currentSettings?.ngl === undefined || currentSettings.ngl === 0
-                ? 'cpu'
-                : systemInfo.gpuSetting.run_mode,
-          }
-        : undefined
-    )
-
-    const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
-    // Execute the binary
-    log(
-      `[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
-    )
-    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
-
-    // Add engine path to the PATH and LD_LIBRARY_PATH
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
-    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-
-    subprocess = spawn(
-      executableOptions.executablePath,
-      ['1', LOCAL_HOST, PORT.toString()],
-      {
-        cwd: path.join(path.parse(executableOptions.executablePath).dir),
-        env: {
-          ...process.env,
-          ENGINE_PATH: executableOptions.enginePath,
-          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
-          ...(executableOptions.vkVisibleDevices?.length > 0 && {
-            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
-          }),
-        },
-      }
-    )
-
-    // Handle subprocess output
-    subprocess.stdout.on('data', (data: any) => {
-      log(`[CORTEX]:: ${data}`)
-    })
-
-    subprocess.stderr.on('data', (data: any) => {
-      log(`[CORTEX]::Error: ${data}`)
-    })
-
-    subprocess.on('close', (code: any) => {
-      log(`[CORTEX]:: cortex exited with code: ${code}`)
-      subprocess = undefined
-      reject(`child process exited with code ${code}`)
-    })
-
-    tcpPortUsed
-      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
-      .then(() => {
-        log(`[CORTEX]:: cortex is ready`)
-        resolve()
-      })
-  })
-}
-
-/**
- * Every module should have a dispose function
- * This will be called when the extension is unloaded and should clean up any resources
- * Also called when app is closed
- */
-function dispose() {
-  // clean other registered resources here
-  killSubprocess()
-}
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
-  isRunning: boolean
-}
-
-/**
- * Retrieve current nitro process
- */
-const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
-  return {
-    isRunning: subprocess != null,
-  }
-}
-
-const addAdditionalDependencies = (data: { name: string; version: string }) => {
-  log(
-    `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
-  )
-  const additionalPath = path.delimiter.concat(
-    path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
-  )
-  // Set the updated PATH
-  process.env.PATH = (process.env.PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-  process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-}
-
-const decompressRunner = async (zipPath: string, output: string) => {
-  console.debug(`Decompressing ${zipPath} to ${output}...`)
-  try {
-    const files = await decompress(zipPath, output)
-    console.debug('Decompress finished!', files)
-  } catch (err) {
-    console.error(`Decompress ${zipPath} failed: ${err}`)
-  }
-}
-
-export default {
-  loadModel,
-  unloadModel,
-  dispose,
-  getCurrentNitroProcessInfo,
-  addAdditionalDependencies,
-  decompressRunner,
-}
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index 3a694e5a0..bd834454a 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -4,7 +4,6 @@
   "version": "1.0.34",
   "description": "Model Management Extension provides model exploration and seamless downloads",
   "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
   "scripts": {
@@ -36,15 +35,9 @@
     "README.md"
   ],
   "dependencies": {
-    "@huggingface/gguf": "^0.0.11",
-    "@huggingface/jinja": "^0.3.0",
     "@janhq/core": "file:../../core",
-    "hyllama": "^0.2.2",
-    "python-shell": "^5.0.0"
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1"
   },
-  "bundleDependencies": [
-    "hyllama",
-    "@huggingface/gguf",
-    "@huggingface/jinja"
-  ]
+  "bundleDependencies": []
 }
diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json
deleted file mode 100644
index c02008cd6..000000000
--- a/extensions/model-extension/resources/default-model.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "object": "model",
-  "version": "1.0",
-  "format": "gguf",
-  "sources": [
-    {
-      "url": "N/A",
-      "filename": "N/A"
-    }
-  ],
-  "id": "N/A",
-  "name": "N/A",
-  "created": 0,
-  "description": "User self import model",
-  "settings": {
-    "ctx_len": 2048,
-    "embedding": false,
-    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
-    "llama_model_path": "N/A"
-  },
-  "parameters": {
-    "temperature": 0.7,
-    "top_p": 0.95,
-    "stream": true,
-    "max_tokens": 2048,
-    "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
-    "frequency_penalty": 0,
-    "presence_penalty": 0
-  },
-  "metadata": {
-    "author": "User",
-    "tags": [],
-    "size": 0
-  },
-  "engine": "nitro"
-}
diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts
index d36d8ffac..6e506140f 100644
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace'
 import commonjs from '@rollup/plugin-commonjs'
 const settingJson = require('./resources/settings.json')
 const packageJson = require('./package.json')
-const defaultModelJson = require('./resources/default-model.json')
 
 export default [
   {
@@ -20,17 +19,18 @@ export default [
     plugins: [
       replace({
         preventAssignment: true,
-        DEFAULT_MODEL: JSON.stringify(defaultModelJson),
         SETTINGS: JSON.stringify(settingJson),
-        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
       }),
       // Allow json resolution
       json(),
       //     Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
+      typescript({
+        useTsconfigDeclarationDir: true,
+        exclude: ['**/__tests__', '**/*.test.ts'],
+      }),
       // Compile TypeScript files
       // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      // commonjs(),
+      commonjs(),
       // Allow node_modules resolution, so you can use 'external' to control
       // which external modules to include in the bundle
       // https://github.com/rollup/rollup-plugin-node-resolve#usage
@@ -39,39 +39,6 @@ export default [
         browser: true,
       }),
 
-      // Resolve source maps to the original source
-      sourceMaps(),
-    ],
-  },
-  {
-    input: `src/node/index.ts`,
-    output: [
-      {
-        file: 'dist/node/index.cjs.js',
-        format: 'cjs',
-        sourcemap: true,
-        inlineDynamicImports: true,
-      },
-    ],
-    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
-    external: ['@janhq/core/node'],
-    watch: {
-      include: 'src/node/**',
-    },
-    plugins: [
-      // Allow json resolution
-      json(),
-      // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
-      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      commonjs(),
-      // Allow node_modules resolution, so you can use 'external' to control
-      // which external modules to include in the bundle
-      // https://github.com/rollup/rollup-plugin-node-resolve#usage
-      resolve({
-        extensions: ['.ts', '.js', '.json'],
-      }),
-
       // Resolve source maps to the original source
       sourceMaps(),
     ],
diff --git a/extensions/model-extension/src/@types/InvalidHostError.ts b/extensions/model-extension/src/@types/InvalidHostError.ts
deleted file mode 100644
index 47262206e..000000000
--- a/extensions/model-extension/src/@types/InvalidHostError.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export class InvalidHostError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'InvalidHostError'
-  }
-}
diff --git a/extensions/model-extension/src/@types/NotSupportModelError.ts b/extensions/model-extension/src/@types/NotSupportModelError.ts
deleted file mode 100644
index 0a1946176..000000000
--- a/extensions/model-extension/src/@types/NotSupportModelError.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export class NotSupportedModelError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'NotSupportedModelError'
-  }
-}
diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts
index 3878d4bf2..01bd272f2 100644
--- a/extensions/model-extension/src/@types/global.d.ts
+++ b/extensions/model-extension/src/@types/global.d.ts
@@ -1,6 +1,5 @@
 export {}
 declare global {
-  declare const DEFAULT_MODEL: object
   declare const NODE: string
 
   interface Core {
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
new file mode 100644
index 000000000..685bf3b9f
--- /dev/null
+++ b/extensions/model-extension/src/cortex.ts
@@ -0,0 +1,166 @@
+import PQueue from 'p-queue'
+import ky from 'ky'
+import {
+  DownloadEvent,
+  events,
+  Model,
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@janhq/core'
+/**
+ * cortex.cpp Model APIs interface
+ */
+interface ICortexAPI {
+  getModel(model: string): Promise<Model>
+  getModels(): Promise<Model[]>
+  pullModel(model: string): Promise<void>
+  importModel(path: string, modelPath: string): Promise<void>
+  deleteModel(model: string): Promise<void>
+  updateModel(model: object): Promise<void>
+  cancelModelPull(model: string): Promise<void>
+}
+/**
+ * Simple CortexAPI service
+ * It could be replaced by cortex client sdk later on
+ */
+const API_URL = 'http://127.0.0.1:39291'
+const SOCKET_URL = 'ws://127.0.0.1:39291'
+
+type ModelList = {
+  data: any[]
+}
+
+export class CortexAPI implements ICortexAPI {
+  queue = new PQueue({ concurrency: 1 })
+  socket?: WebSocket = undefined
+
+  constructor() {
+    this.queue.add(() => this.healthz())
+    this.subscribeToEvents()
+  }
+
+  getModel(model: string): Promise<any> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/models/${model}`)
+        .json()
+        .then((e) => this.transformModel(e))
+    )
+  }
+
+  getModels(): Promise<Model[]> {
+    return this.queue
+      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .then((e) =>
+        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
+      )
+  }
+
+  pullModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/pull`, { json: { model } })
+        .json()
+        .catch(async (e) => {
+          throw (await e.response?.json()) ?? e
+        })
+        .then()
+    )
+  }
+
+  importModel(model: string, modelPath: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/import`, { json: { model, modelPath } })
+        .json()
+        .catch((e) => console.debug(e)) // Ignore error
+        .then()
+    )
+  }
+
+  deleteModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky.delete(`${API_URL}/models/${model}`).json().then()
+    )
+  }
+
+  updateModel(model: object): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .patch(`${API_URL}/v1/models/${model}`, { json: { model } })
+        .json()
+        .then()
+    )
+  }
+  cancelModelPull(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .json()
+        .then()
+    )
+  }
+
+  healthz(): Promise<void> {
+    return ky
+      .get(`${API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => {})
+  }
+
+  subscribeToEvents() {
+    this.queue.add(
+      () =>
+        new Promise<void>((resolve) => {
+          this.socket = new WebSocket(`${SOCKET_URL}/events`)
+          console.log('Socket connected')
+
+          this.socket.addEventListener('message', (event) => {
+            const data = JSON.parse(event.data)
+            const transferred = data.task.items.reduce(
+              (accumulator, currentValue) =>
+                accumulator + currentValue.downloadedBytes,
+              0
+            )
+            const total = data.task.items.reduce(
+              (accumulator, currentValue) => accumulator + currentValue.bytes,
+              0
+            )
+            const percent = ((transferred ?? 1) / (total ?? 1)) * 100
+
+            events.emit(data.type, {
+              modelId: data.task.id,
+              percent: percent,
+              size: {
+                transferred: transferred,
+                total: total,
+              },
+            })
+          })
+          resolve()
+        })
+    )
+  }
+
+  private transformModel(model: any) {
+    model.parameters = setParameters<ModelRuntimeParams>(model)
+    model.settings = setParameters<ModelSettingParams>(model)
+    model.metadata = {
+      tags: [],
+    }
+    return model as Model
+  }
+}
+
+type FilteredParams<T> = {
+  [K in keyof T]: T[K]
+}
+
+function setParameters<T>(params: T): T {
+  const filteredParams: FilteredParams<T> = { ...params }
+  return filteredParams
+}
diff --git a/extensions/model-extension/src/helpers/path.test.ts b/extensions/model-extension/src/helpers/path.test.ts
deleted file mode 100644
index 64ca65d8a..000000000
--- a/extensions/model-extension/src/helpers/path.test.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import { extractFileName } from './path';
-
-describe('extractFileName Function', () => {
-  it('should correctly extract the file name with the provided file extension', () => {
-    const url = 'http://example.com/some/path/to/file.ext';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.ext');
-  });
-
-  it('should correctly append the file extension if it does not already exist in the file name', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should handle cases where the URL does not have a file extension correctly', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.jpg';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.jpg');
-  });
-
-  it('should correctly handle URLs without a trailing slash', () => {
-    const url = 'http://example.com/some/path/tofile';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.txt');
-  });
-
-  it('should correctly handle URLs with multiple file extensions', () => {
-    const url = 'http://example.com/some/path/tofile.tar.gz';
-    const fileExtension = '.gz';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.tar.gz');
-  });
-
-  it('should correctly handle URLs with special characters', () => {
-    const url = 'http://example.com/some/path/tófílë.extë';
-    const fileExtension = '.extë';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tófílë.extë');
-  });
-
-  it('should correctly handle URLs that are just a file with no path', () => {
-    const url = 'http://example.com/file.txt';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should correctly handle URLs that have special query parameters', () => {
-    const url = 'http://example.com/some/path/tofile.ext?query=1';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url.split('?')[0], fileExtension);
-    expect(fileName).toBe('tofile.ext');
-  });
-
-  it('should correctly handle URLs that have uppercase characters', () => {
-    const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('FILE.EXT');
-  });
-
-  it('should correctly handle invalid URLs', () => {
-    const url = 'invalid-url';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('invalid-url.txt');
-  });
-
-  it('should correctly handle empty URLs', () => {
-    const url = '';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-
-  it('should correctly handle undefined URLs', () => {
-    const url = undefined;
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url as any, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-});
diff --git a/extensions/model-extension/src/helpers/path.ts b/extensions/model-extension/src/helpers/path.ts
deleted file mode 100644
index 6091005b8..000000000
--- a/extensions/model-extension/src/helpers/path.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-/**
- *  try to retrieve the download file name from the source url
- */
-
-export function extractFileName(url: string, fileExtension: string): string {
-  if(!url) return fileExtension
-
-  const extractedFileName = url.split('/').pop()
-  const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
-    ? extractedFileName
-    : extractedFileName + fileExtension
-  return fileName
-}
diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts
index 3f804b6d6..05598c30d 100644
--- a/extensions/model-extension/src/index.test.ts
+++ b/extensions/model-extension/src/index.test.ts
@@ -1,846 +1,90 @@
-/**
- * @jest-environment jsdom
- */
-const readDirSyncMock = jest.fn()
-const existMock = jest.fn()
-const readFileSyncMock = jest.fn()
-const downloadMock = jest.fn()
-const mkdirMock = jest.fn()
-const writeFileSyncMock = jest.fn()
-const copyFileMock = jest.fn()
-const dirNameMock = jest.fn()
-const executeMock = jest.fn()
+import JanModelExtension from './index'
+import { Model } from '@janhq/core'
+
+let SETTINGS = []
+// @ts-ignore
+global.SETTINGS = SETTINGS
 
 jest.mock('@janhq/core', () => ({
   ...jest.requireActual('@janhq/core/node'),
   events: {
     emit: jest.fn(),
   },
-  fs: {
-    existsSync: existMock,
-    readdirSync: readDirSyncMock,
-    readFileSync: readFileSyncMock,
-    writeFileSync: writeFileSyncMock,
-    mkdir: mkdirMock,
-    copyFile: copyFileMock,
-    fileStat: () => ({
-      isDirectory: false,
-    }),
-  },
-  dirName: dirNameMock,
   joinPath: (paths) => paths.join('/'),
-  ModelExtension: jest.fn(),
-  downloadFile: downloadMock,
-  executeOnMain: executeMock,
+  ModelExtension: jest.fn().mockImplementation(function () {
+    // @ts-ignore
+    this.registerSettings = () => {
+      return Promise.resolve()
+    }
+    // @ts-ignore
+    return this
+  }),
 }))
 
-jest.mock('@huggingface/gguf')
-
-global.fetch = jest.fn(() =>
-  Promise.resolve({
-    json: () => Promise.resolve({ test: 100 }),
-    arrayBuffer: jest.fn(),
-  })
-) as jest.Mock
-
-import JanModelExtension from '.'
-import { fs, dirName } from '@janhq/core'
-import { gguf } from '@huggingface/gguf'
-
 describe('JanModelExtension', () => {
-  let sut: JanModelExtension
-
-  beforeAll(() => {
-    // @ts-ignore
-    sut = new JanModelExtension()
-  })
+  let extension: JanModelExtension
+  let mockCortexAPI: any
 
   beforeEach(() => {
-    jest.clearAllMocks()
-  })
-
-  describe('getConfiguredModels', () => {
-    describe("when there's no models are pre-populated", () => {
-      it('should return empty array', async () => {
-        // Mock configured models data
-        const configuredModels = []
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual([])
-      })
-    })
-
-    describe("when there's are pre-populated models - all flattened", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe("when there's are pre-populated models - there are nested folders", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else if (path.includes('model2/model2-1'))
-            return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('getDownloadedModels', () => {
-    describe('no models downloaded', () => {
-      it('should return empty array', async () => {
-        // Mock downloaded models data
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual([])
-      })
-    })
-    describe('only one model is downloaded', () => {
-      describe('flatten folder', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2']
-            else if (path === 'file://models/model1')
-              return ['model.json', 'test.gguf']
-            else return ['model.json']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded', () => {
-      describe('nested folders', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2/model2-1']
-            else return ['model.json', 'test.gguf']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-              expect.objectContaining({
-                file_path: 'file://models/model2/model2-1/model.json',
-                id: '2',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded with uppercased GGUF files', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.GGUF']
-          else return ['model.json', 'test.gguf']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe('all models are downloaded - GGUF & Tensort RT', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.gguf']
-          else return ['model.json', 'test.engine']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('deleteModel', () => {
-    describe('model is a GGUF model', () => {
-      it('should delete the GGUF file', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockImplementation((path) => {
-          return ['model.json', 'test.gguf']
-        })
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.gguf'
-        )
-      })
-
-      it('no gguf file presented', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockReturnValue(['model.json'])
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledTimes(0)
-      })
-
-      it('delete an imported model', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.gguf'])
-
-        // MARK: This is a tricky logic implement?
-        // I will just add test for now but will align on the legacy implementation
-        fs.readFileSync = jest.fn().mockReturnValue(
-          JSON.stringify({
-            metadata: {
-              author: 'user',
-            },
-          })
-        )
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.rm).toHaveBeenCalledWith('file://models/model1')
-      })
-
-      it('delete tensorrt-models', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.engine'])
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.engine'
-        )
-      })
-    })
-  })
-
-  describe('downloadModel', () => {
-    const model: any = {
-      id: 'model-id',
-      name: 'Test Model',
-      sources: [
-        { url: 'http://example.com/model.gguf', filename: 'model.gguf' },
-      ],
-      engine: 'test-engine',
+    mockCortexAPI = {
+      getModels: jest.fn().mockResolvedValue([]),
+      pullModel: jest.fn().mockResolvedValue(undefined),
+      importModel: jest.fn().mockResolvedValue(undefined),
+      deleteModel: jest.fn().mockResolvedValue(undefined),
+      updateModel: jest.fn().mockResolvedValue({}),
+      cancelModelPull: jest.fn().mockResolvedValue(undefined),
     }
 
-    const network = {
-      ignoreSSL: true,
-      proxy: 'http://proxy.example.com',
-    }
+    // @ts-ignore
+    extension = new JanModelExtension()
+    extension.cortexAPI = mockCortexAPI
+  })
 
-    const gpuSettings: any = {
-      gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }],
-    }
+  it('should register settings on load', async () => {
+    // @ts-ignore
+    const registerSettingsSpy = jest.spyOn(extension, 'registerSettings')
+    await extension.onLoad()
+    expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS)
+  })
 
-    it('should reject with invalid gguf metadata', async () => {
-      existMock.mockImplementation(() => false)
+  it('should pull a model', async () => {
+    const model = 'test-model'
+    await extension.pullModel(model)
+    expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model)
+  })
 
-      expect(
-        sut.downloadModel(model, gpuSettings, network)
-      ).rejects.toBeTruthy()
-    })
+  it('should cancel model download', async () => {
+    const model = 'test-model'
+    await extension.cancelModelPull(model)
+    expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model)
+  })
 
-    it('should download corresponding ID', async () => {
-      existMock.mockImplementation(() => true)
-      dirNameMock.mockImplementation(() => 'file://models/model1')
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
+  it('should delete a model', async () => {
+    const model = 'test-model'
+    await extension.deleteModel(model)
+    expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model)
+  })
 
-      expect(
-        await sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
+  it('should get all models', async () => {
+    const models = await extension.getModels()
+    expect(models).toEqual([])
+    expect(mockCortexAPI.getModels).toHaveBeenCalled()
+  })
 
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
+  it('should update a model', async () => {
+    const model: Partial<Model> = { id: 'test-model' }
+    const updatedModel = await extension.updateModel(model)
+    expect(updatedModel).toEqual({})
+    expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model)
+  })
 
-    it('should handle invalid model file', async () => {
-      executeMock.mockResolvedValue({})
-
-      fs.readFileSync = jest.fn(() => {
-        return JSON.stringify({ metadata: { author: 'user' } })
-      })
-
-      expect(
-        sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).resolves.not.toThrow()
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-    it('should handle model file with no sources', async () => {
-      executeMock.mockResolvedValue({})
-      const modelWithoutSources = { ...model, sources: [] }
-
-      expect(
-        sut.downloadModel(
-          {
-            ...modelWithoutSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).resolves.toBe(undefined)
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-
-    it('should handle model file with multiple sources', async () => {
-      const modelWithMultipleSources = {
-        ...model,
-        sources: [
-          { url: 'http://example.com/model1.gguf', filename: 'model1.gguf' },
-          { url: 'http://example.com/model2.gguf', filename: 'model2.gguf' },
-        ],
-      }
-
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
-
-      expect(
-        await sut.downloadModel(
-          {
-            ...modelWithMultipleSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model1.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model1.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model2.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model2.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
-
-    it('should handle model file with no file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithoutFilepath = { ...model, file_path: undefined }
-
-      await sut.downloadModel(modelWithoutFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model-id/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model file with invalid file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithInvalidFilepath = {
-        ...model,
-        file_path: 'file://models/invalid-model.json',
-      }
-
-      await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model1/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model with valid chat_template', async () => {
-      executeMock.mockResolvedValue('{prompt}')
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '{prompt}',
-        },
-      })
-    })
-
-    it('should handle model without chat_template', async () => {
-      executeMock.mockRejectedValue({})
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      })
-    })
+  it('should import a model', async () => {
+    const model: any = { path: 'test-path' }
+    const optionType: any = 'test-option'
+    await extension.importModel(model, optionType)
+    expect(mockCortexAPI.importModel).toHaveBeenCalledWith(
+      model.path,
+      optionType
+    )
   })
 })
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 7e7c12469..b879e0bb9 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -1,66 +1,47 @@
 import {
-  fs,
-  downloadFile,
-  abortDownload,
-  InferenceEngine,
-  joinPath,
   ModelExtension,
   Model,
-  getJanDataFolderPath,
-  events,
-  DownloadEvent,
-  DownloadRoute,
-  DownloadState,
-  OptionType,
-  ImportingModel,
-  LocalImportModelEvent,
-  baseName,
-  GpuSetting,
-  DownloadRequest,
-  executeOnMain,
-  HuggingFaceRepoData,
-  getFileSize,
-  AllQuantizations,
-  ModelEvent,
-  ModelFile,
+  InferenceEngine,
+  fs,
+  joinPath,
   dirName,
 } from '@janhq/core'
-
-import { extractFileName } from './helpers/path'
-import { GGUFMetadata, gguf } from '@huggingface/gguf'
-import { NotSupportedModelError } from './@types/NotSupportModelError'
-import { InvalidHostError } from './@types/InvalidHostError'
+import { CortexAPI } from './cortex'
 
 declare const SETTINGS: Array<any>
+
+/**
+ * TODO: Set env for HF access token? or via API request?
+ */
 enum Settings {
   huggingFaceAccessToken = 'hugging-face-access-token',
 }
 
+/**
+ * Extension enum
+ */
+enum ExtensionEnum {
+  downloadedModels = 'downloadedModels',
+}
+
 /**
  * A extension for models
  */
 export default class JanModelExtension extends ModelExtension {
   private static readonly _homeDir = 'file://models'
-  private static readonly _modelMetadataFileName = 'model.json'
-  private static readonly _supportedModelFormat = '.gguf'
-  private static readonly _incompletedModelFileName = '.download'
-  private static readonly _offlineInferenceEngine = [
-    InferenceEngine.nitro,
-    InferenceEngine.nitro_tensorrt_llm,
-  ]
-  private static readonly _tensorRtEngineFormat = '.engine'
-  private static readonly _supportedGpuArch = ['ampere', 'ada']
-
-  interrupted = false
+  cortexAPI: CortexAPI = new CortexAPI()
 
   /**
    * Called when the extension is loaded.
    * @override
    */
   async onLoad() {
-    // Handle Desktop Events
     this.registerSettings(SETTINGS)
-    this.handleDesktopEvents()
+
+    // Try get models from cortex.cpp
+    this.getModels().then((models) => {
+      this.registerModels(models)
+    })
   }
 
   /**
@@ -72,384 +53,145 @@ export default class JanModelExtension extends ModelExtension {
   /**
    * Downloads a machine learning model.
    * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model is downloaded.
    */
-  async downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void> {
-    // Create corresponding directory
-    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
-    if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath)
-    const modelJsonPath =
-      model.file_path ?? (await joinPath([modelDirPath, 'model.json']))
-
-    // Download HF model - model.json not exist
-    if (!(await fs.existsSync(modelJsonPath))) {
-      // It supports only one source for HF download
-      const metadata = await this.fetchModelMetadata(model.sources[0].url)
-      const updatedModel = await this.retrieveGGUFMetadata(metadata)
-      if (updatedModel) {
-        // Update model settings
-        model.settings = {
-          ...model.settings,
-          ...updatedModel.settings,
-        }
-        model.parameters = {
-          ...model.parameters,
-          ...updatedModel.parameters,
-        }
-      }
-      await fs.writeFileSync(modelJsonPath, JSON.stringify(model, null, 2))
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-    if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
-      if (!gpuSettings || gpuSettings.gpus.length === 0) {
-        console.error('No GPU found. Please check your GPU setting.')
-        return
-      }
-      const firstGpu = gpuSettings.gpus[0]
-      if (!firstGpu.name.toLowerCase().includes('nvidia')) {
-        console.error('No Nvidia GPU found. Please check your GPU setting.')
-        return
-      }
-      const gpuArch = firstGpu.arch
-      if (gpuArch === undefined) {
-        console.error(
-          'No GPU architecture found. Please check your GPU setting.'
-        )
-        return
-      }
-
-      if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
-        console.debug(
-          `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
-        )
-        return
-      }
-
-      const os = 'windows' // TODO: remove this hard coded value
-
-      const newSources = model.sources.map((source) => {
-        const newSource = { ...source }
-        newSource.url = newSource.url
-          .replace(/<os>/g, os)
-          .replace(/<gpuarch>/g, gpuArch)
-        return newSource
-      })
-      model.sources = newSources
-    }
-
-    console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
-
-    if (model.sources.length > 1) {
-      // path to model binaries
-      for (const source of model.sources) {
-        let path = extractFileName(
-          source.url,
-          JanModelExtension._supportedModelFormat
-        )
-        if (source.filename) {
-          path = model.file_path
-            ? await joinPath([await dirName(model.file_path), source.filename])
-            : await joinPath([modelDirPath, source.filename])
-        }
-
-        const downloadRequest: DownloadRequest = {
-          url: source.url,
-          localPath: path,
-          modelId: model.id,
-        }
-        downloadFile(downloadRequest, network)
-      }
-      // TODO: handle multiple binaries for web later
-    } else {
-      const fileName = extractFileName(
-        model.sources[0]?.url,
-        JanModelExtension._supportedModelFormat
-      )
-      const path = model.file_path
-        ? await joinPath([await dirName(model.file_path), fileName])
-        : await joinPath([modelDirPath, fileName])
-      const downloadRequest: DownloadRequest = {
-        url: model.sources[0]?.url,
-        localPath: path,
-        modelId: model.id,
-      }
-      downloadFile(downloadRequest, network)
-
-      if (window && window.core?.api && window.core.api.baseApiUrl) {
-        this.startPollingDownloadProgress(model.id)
-      }
-    }
-  }
-
-  private toHuggingFaceUrl(repoId: string): string {
-    try {
-      const url = new URL(repoId)
-      if (url.host !== 'huggingface.co') {
-        throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
-      }
-
-      const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
-      if (paths.length < 2) {
-        throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
-      }
-
-      return `${url.origin}/api/models/${paths[0]}/${paths[1]}`
-    } catch (err) {
-      if (err instanceof InvalidHostError) {
-        throw err
-      }
-
-      if (repoId.startsWith('https')) {
-        throw new Error(`Cannot parse url: ${repoId}`)
-      }
-
-      return `https://huggingface.co/api/models/${repoId}`
-    }
-  }
-
-  async fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData> {
-    const sanitizedUrl = this.toHuggingFaceUrl(repoId)
-    console.debug('sanitizedUrl', sanitizedUrl)
-
-    const huggingFaceAccessToken = (
-      await this.getSetting<string>(Settings.huggingFaceAccessToken, '')
-    ).trim()
-
-    const headers = {
-      Accept: 'application/json',
-    }
-
-    if (huggingFaceAccessToken.length > 0) {
-      headers['Authorization'] = `Bearer ${huggingFaceAccessToken}`
-    }
-
-    const res = await fetch(sanitizedUrl, {
-      headers: headers,
-    })
-    const response = await res.json()
-    if (response['error'] != null) {
-      throw new Error(response['error'])
-    }
-
-    const data = response as HuggingFaceRepoData
-
-    if (data.tags.indexOf('gguf') === -1) {
-      throw new NotSupportedModelError(
-        `${repoId} is not supported. Only GGUF models are supported.`
-      )
-    }
-
-    const promises: Promise<number>[] = []
-
-    // fetching file sizes
-    const url = new URL(sanitizedUrl)
-    const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
-
-    for (const sibling of data.siblings) {
-      const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}`
-      sibling.downloadUrl = downloadUrl
-      promises.push(getFileSize(downloadUrl))
-    }
-
-    const result = await Promise.all(promises)
-    for (let i = 0; i < data.siblings.length; i++) {
-      data.siblings[i].fileSize = result[i]
-    }
-
-    AllQuantizations.forEach((quantization) => {
-      data.siblings.forEach((sibling) => {
-        if (!sibling.quantization && sibling.rfilename.includes(quantization)) {
-          sibling.quantization = quantization
-        }
-      })
-    })
-
-    data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`
-    return data
-  }
-
-  async fetchModelMetadata(url: string): Promise<GGUFMetadata> {
-    const { metadata } = await gguf(url)
-    return metadata
-  }
-
-  /**
-   * Specifically for Jan server.
-   */
-  private async startPollingDownloadProgress(modelId: string): Promise<void> {
-    // wait for some seconds before polling
-    await new Promise((resolve) => setTimeout(resolve, 3000))
-
-    return new Promise((resolve) => {
-      const interval = setInterval(async () => {
-        fetch(
-          `${window.core.api.baseApiUrl}/v1/download/${DownloadRoute.getDownloadProgress}/${modelId}`,
-          {
-            method: 'GET',
-            headers: { contentType: 'application/json' },
-          }
-        ).then(async (res) => {
-          const state: DownloadState = await res.json()
-          if (state.downloadState === 'end') {
-            events.emit(DownloadEvent.onFileDownloadSuccess, state)
-            clearInterval(interval)
-            resolve()
-            return
-          }
-
-          if (state.downloadState === 'error') {
-            events.emit(DownloadEvent.onFileDownloadError, state)
-            clearInterval(interval)
-            resolve()
-            return
-          }
-
-          events.emit(DownloadEvent.onFileDownloadUpdate, state)
-        })
-      }, 1000)
-    })
+  async pullModel(model: string): Promise<void> {
+    /**
+     * Sending POST to /models/pull/{id} endpoint to pull the model
+     */
+    return this.cortexAPI?.pullModel(model)
   }
 
   /**
    * Cancels the download of a specific machine learning model.
    *
-   * @param {string} modelId - The ID of the model whose download is to be cancelled.
+   * @param {string} model - The ID of the model whose download is to be cancelled.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  async cancelModelDownload(modelId: string): Promise<void> {
-    const path = await joinPath([JanModelExtension._homeDir, modelId, modelId])
-    try {
-      await abortDownload(path)
-      await fs.unlinkSync(path)
-    } catch (e) {
-      console.error(e)
-    }
+  async cancelModelPull(model: string): Promise<void> {
+    /**
+     * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
+     */
+    this.cortexAPI?.cancelModelPull(model)
   }
 
   /**
-   * Deletes a machine learning model.
-   * @param filePath - The path to the model file to delete.
+   * Deletes a pulled model
+   * @param model - The model to delete
    * @returns A Promise that resolves when the model is deleted.
    */
-  async deleteModel(model: ModelFile): Promise<void> {
-    try {
-      const dirPath = await dirName(model.file_path)
-      const jsonFilePath = await joinPath([
-        dirPath,
-        JanModelExtension._modelMetadataFileName,
-      ])
-      const modelInfo = JSON.parse(
-        await this.readModelMetadata(jsonFilePath)
-      ) as Model
-
-      // TODO: This is so tricky?
-      // Should depend on sources?
-      const isUserImportModel =
-        modelInfo.metadata?.author?.toLowerCase() === 'user'
-      if (isUserImportModel) {
-        // just delete the folder
-        return fs.rm(dirPath)
-      }
-
-      // remove all files under dirPath except model.json
-      const files = await fs.readdirSync(dirPath)
-      const deletePromises = files.map(async (fileName: string) => {
-        if (fileName !== JanModelExtension._modelMetadataFileName) {
-          return fs.unlinkSync(await joinPath([dirPath, fileName]))
-        }
-      })
-      await Promise.allSettled(deletePromises)
-    } catch (err) {
-      console.error(err)
-    }
+  async deleteModel(model: string): Promise<void> {
+    return this.cortexAPI?.deleteModel(model)
   }
 
   /**
-   * Gets all downloaded models.
+   * Gets all pulled models
    * @returns A Promise that resolves with an array of all models.
    */
-  async getDownloadedModels(): Promise<ModelFile[]> {
-    return await this.getModelsMetadata(
-      async (modelDir: string, model: Model) => {
-        if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
-          return true
+  async getModels(): Promise<Model[]> {
+    /**
+     * In this action, if return empty array right away
+     * it would reset app cache and app will not function properly
+     * should compare and try import
+     */
 
-        // model binaries (sources) are absolute path & exist
-        const existFiles = await Promise.all(
-          model.sources.map(
-            (source) =>
-              // Supposed to be a local file url
-              !source.url.startsWith(`http://`) &&
-              !source.url.startsWith(`https://`)
+    if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
+      // Updated from an older version than 0.5.5
+      // Scan through the models folder and import them (Legacy flow)
+      // Return models immediately
+      return this.scanModelsFolder().then((models) => {
+        return models ?? []
+      })
+    }
+
+    let currentModels: Model[] = []
+
+    try {
+      currentModels = JSON.parse(
+        localStorage.getItem(ExtensionEnum.downloadedModels)
+      ) as Model[]
+    } catch (e) {
+      currentModels = []
+      console.error(e)
+    }
+
+    /**
+     * Here we are filtering out the models that are not imported
+     * and are not using llama.cpp engine
+     */
+    var toImportModels = currentModels.filter(
+      (e) => e.engine === InferenceEngine.nitro
+    )
+
+    await this.cortexAPI?.getModels().then((models) => {
+      const existingIds = models.map((e) => e.id)
+      toImportModels = toImportModels.filter(
+        (e: Model) => !existingIds.includes(e.id)
+      )
+    })
+
+    console.log('To import models:', toImportModels.length)
+    /**
+     * There are models to import
+     * do not return models from cortex.cpp yet
+     * otherwise it will reset the app cache
+     * */
+    if (toImportModels.length > 0) {
+      // Import models
+      await Promise.all(
+        toImportModels.map(async (model: Model & { file_path: string }) =>
+          this.importModel(
+            model.id,
+            await joinPath([
+              await dirName(model.file_path),
+              model.sources[0]?.filename ??
+                model.settings?.llama_model_path ??
+                model.sources[0]?.url.split('/').pop() ??
+                model.id,
+            ])
           )
         )
-        if (existFiles.every((exist) => exist)) return true
+      )
 
-        const result = await fs
-          .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
-          .then((files: string[]) => {
-            // Model binary exists in the directory
-            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
-            return (
-              files.includes(modelDir) ||
-              files.filter((file) => {
-                if (
-                  file.endsWith(JanModelExtension._incompletedModelFileName)
-                ) {
-                  return false
-                }
-                return (
-                  file
-                    .toLowerCase()
-                    .includes(JanModelExtension._supportedModelFormat) ||
-                  file
-                    .toLowerCase()
-                    .includes(JanModelExtension._tensorRtEngineFormat)
-                )
-                // Check if the number of matched files equals the number of sources
-              })?.length >= model.sources.length
-            )
-          })
+      return currentModels
+    }
 
-        return result
-      }
+    /**
+     * All models are imported successfully before
+     * just return models from cortex.cpp
+     */
+    return (
+      this.cortexAPI?.getModels().then((models) => {
+        return models
+      }) ?? Promise.resolve([])
     )
   }
 
-  private async getModelJsonPath(
-    folderFullPath: string
-  ): Promise<string | undefined> {
-    // try to find model.json recursively inside each folder
-    if (!(await fs.existsSync(folderFullPath))) return undefined
-
-    const files: string[] = await fs.readdirSync(folderFullPath)
-    if (files.length === 0) return undefined
-
-    if (files.includes(JanModelExtension._modelMetadataFileName)) {
-      return joinPath([
-        folderFullPath,
-        JanModelExtension._modelMetadataFileName,
-      ])
-    }
-    // continue recursive
-    for (const file of files) {
-      const path = await joinPath([folderFullPath, file])
-      const fileStats = await fs.fileStat(path)
-      if (fileStats.isDirectory) {
-        const result = await this.getModelJsonPath(path)
-        if (result) return result
-      }
-    }
+  /**
+   * Update a pulled model metadata
+   * @param model - The metadata of the model
+   */
+  async updateModel(model: Partial<Model>): Promise<Model> {
+    return this.cortexAPI
+      ?.updateModel(model)
+      .then(() => this.cortexAPI!.getModel(model.id))
   }
 
-  private async getModelsMetadata(
-    selector?: (path: string, model: Model) => Promise<boolean>
-  ): Promise<ModelFile[]> {
+  /**
+   * Import an existing model file
+   * @param model
+   * @param optionType
+   */
+  async importModel(model: string, modelPath: string): Promise<void> {
+    return this.cortexAPI?.importModel(model, modelPath)
+  }
+
+  //// LEGACY MODEL FOLDER ////
+  /**
+   * Scan through models folder and return downloaded models
+   * @returns
+   */
+  private async scanModelsFolder(): Promise<Model[]> {
     try {
       if (!(await fs.existsSync(JanModelExtension._homeDir))) {
         console.debug('Model folder not found')
@@ -459,10 +201,14 @@ export default class JanModelExtension extends ModelExtension {
       const files: string[] = await fs.readdirSync(JanModelExtension._homeDir)
 
       const allDirectories: string[] = []
-      for (const file of files) {
-        if (file === '.DS_Store') continue
-        if (file === 'config') continue
-        allDirectories.push(file)
+
+      for (const modelFolder of files) {
+        const fullModelFolderPath = await joinPath([
+          JanModelExtension._homeDir,
+          modelFolder,
+        ])
+        if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue
+        allDirectories.push(modelFolder)
       }
 
       const readJsonPromises = allDirectories.map(async (dirName) => {
@@ -477,7 +223,7 @@ export default class JanModelExtension extends ModelExtension {
 
         if (await fs.existsSync(jsonPath)) {
           // if we have the model.json file, read it
-          let model = await this.readModelMetadata(jsonPath)
+          let model = await fs.readFileSync(jsonPath, 'utf-8')
 
           model = typeof model === 'object' ? model : JSON.parse(model)
 
@@ -491,420 +237,89 @@ export default class JanModelExtension extends ModelExtension {
             ]
           }
           model.file_path = jsonPath
-          model.file_name = JanModelExtension._modelMetadataFileName
+          model.file_name = 'model.json'
 
-          if (selector && !(await selector?.(dirName, model))) {
-            return
-          }
-          return model
-        } else {
-          // otherwise, we generate our own model file
-          // TODO: we might have more than one binary file here. This will be addressed with new version of Model file
-          //  which is the PR from Hiro on branch Jan can see
-          return this.generateModelMetadata(dirName)
+          // Check model file exist
+          // model binaries (sources) are absolute path & exist (symlinked)
+          const existFiles = await Promise.all(
+            model.sources.map(
+              (source) =>
+                // Supposed to be a local file url
+                !source.url.startsWith(`http://`) &&
+                !source.url.startsWith(`https://`)
+            )
+          )
+          if (existFiles.every((exist) => exist)) return true
+
+          const result = await fs
+            .readdirSync(await joinPath([JanModelExtension._homeDir, dirName]))
+            .then((files: string[]) => {
+              // Model binary exists in the directory
+              // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
+              return (
+                files.includes(dirName) || // Legacy model GGUF without extension
+                files.filter((file) => {
+                  return (
+                    file.toLowerCase().endsWith('.gguf') || // GGUF
+                    file.toLowerCase().endsWith('.engine') // Tensort-LLM
+                  )
+                })?.length > 0 // TODO: find better way (can use basename to check the file name with source url)
+              )
+            })
+
+          if (result) return model
+          else return undefined
         }
       })
       const results = await Promise.allSettled(readJsonPromises)
-      const modelData = results.map((result) => {
-        if (result.status === 'fulfilled' && result.value) {
-          try {
-            const model =
-              typeof result.value === 'object'
-                ? result.value
-                : JSON.parse(result.value)
-            return model as ModelFile
-          } catch {
-            console.debug(`Unable to parse model metadata: ${result.value}`)
+      const modelData = results
+        .map((result) => {
+          if (result.status === 'fulfilled' && result.value) {
+            try {
+              const model =
+                typeof result.value === 'object'
+                  ? result.value
+                  : JSON.parse(result.value)
+              return model as Model
+            } catch {
+              console.debug(`Unable to parse model metadata: ${result.value}`)
+            }
           }
-        }
-        return undefined
-      })
+          return undefined
+        })
+        .filter((e) => !!e)
 
-      return modelData.filter((e) => !!e)
+      return modelData
     } catch (err) {
       console.error(err)
       return []
     }
   }
 
-  private readModelMetadata(path: string) {
-    return fs.readFileSync(path, 'utf-8')
-  }
-
   /**
-   * Handle the case where we have the model directory but we don't have the corresponding
-   * model.json file associated with it.
-   *
-   * This function will create a model.json file for the model.
-   * It works only with single binary file model.
-   *
-   * @param dirName the director which reside in ~/jan/models but does not have model.json file.
-   */
-  private async generateModelMetadata(dirName: string): Promise<Model> {
-    const files: string[] = await fs.readdirSync(
-      await joinPath([JanModelExtension._homeDir, dirName])
-    )
-
-    // sort files by name
-    files.sort()
-
-    // find the first file which is not a directory
-    let binaryFileName: string | undefined = undefined
-    let binaryFileSize: number | undefined = undefined
-
-    for (const file of files) {
-      if (file.endsWith(JanModelExtension._supportedModelFormat)) {
-        const path = await joinPath([JanModelExtension._homeDir, dirName, file])
-        const fileStats = await fs.fileStat(path)
-        if (fileStats.isDirectory) continue
-        binaryFileSize = fileStats.size
-        binaryFileName = file
-        break
-      }
-    }
-
-    if (!binaryFileName) {
-      console.warn(`Unable to find binary file for model ${dirName}`)
-      return
-    }
-
-    const defaultModel = (await this.getDefaultModel()) as Model
-    const metadata = await executeOnMain(
-      NODE,
-      'retrieveGGUFMetadata',
-      await joinPath([
-        await getJanDataFolderPath(),
-        'models',
-        dirName,
-        binaryFileName,
-      ])
-    ).catch(() => undefined)
-
-    const updatedModel = await this.retrieveGGUFMetadata(metadata)
-
-    if (!defaultModel) {
-      console.error('Unable to find default model')
-      return
-    }
-
-    const model: Model = {
-      ...defaultModel,
-      // Overwrite default N/A fields
-      id: dirName,
-      name: dirName,
-      sources: [
-        {
-          url: binaryFileName,
-          filename: binaryFileName,
-        },
-      ],
-      parameters: {
-        ...defaultModel.parameters,
-        ...updatedModel.parameters,
-      },
-      settings: {
-        ...defaultModel.settings,
-        ...updatedModel.settings,
-        llama_model_path: binaryFileName,
-      },
-      created: Date.now(),
-      description: '',
-      metadata: {
-        size: binaryFileSize,
-        author: 'User',
-        tags: [],
-      },
-    }
-
-    const modelFilePath = await joinPath([
-      JanModelExtension._homeDir,
-      dirName,
-      JanModelExtension._modelMetadataFileName,
-    ])
-
-    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
-
-    return model
-  }
-
-  override async getDefaultModel(): Promise<Model> {
-    const defaultModel = DEFAULT_MODEL as Model
-    return defaultModel
-  }
-
-  /**
-   * Gets all available models.
-   * @returns A Promise that resolves with an array of all models.
-   */
-  async getConfiguredModels(): Promise<ModelFile[]> {
-    return this.getModelsMetadata()
-  }
-
-  handleDesktopEvents() {
-    if (window && window.electronAPI) {
-      window.electronAPI.onFileDownloadUpdate(
-        async (_event: string, state: DownloadState | undefined) => {
-          if (!state) return
-          state.downloadState = 'downloading'
-          events.emit(DownloadEvent.onFileDownloadUpdate, state)
-        }
-      )
-      window.electronAPI.onFileDownloadError(
-        async (_event: string, state: DownloadState) => {
-          state.downloadState = 'error'
-          events.emit(DownloadEvent.onFileDownloadError, state)
-        }
-      )
-      window.electronAPI.onFileDownloadSuccess(
-        async (_event: string, state: DownloadState) => {
-          state.downloadState = 'end'
-          events.emit(DownloadEvent.onFileDownloadSuccess, state)
-        }
-      )
-    }
-  }
-
-  private async importModelSymlink(
-    modelBinaryPath: string,
-    modelFolderName: string,
-    modelFolderPath: string
-  ): Promise<ModelFile> {
-    const fileStats = await fs.fileStat(modelBinaryPath, true)
-    const binaryFileSize = fileStats.size
-
-    // Just need to generate model.json there
-    const defaultModel = (await this.getDefaultModel()) as Model
-    if (!defaultModel) {
-      console.error('Unable to find default model')
-      return
-    }
-
-    const metadata = await executeOnMain(
-      NODE,
-      'retrieveGGUFMetadata',
-      modelBinaryPath
-    )
-
-    const binaryFileName = await baseName(modelBinaryPath)
-    const updatedModel = await this.retrieveGGUFMetadata(metadata)
-
-    const model: Model = {
-      ...defaultModel,
-      id: modelFolderName,
-      name: modelFolderName,
-      sources: [
-        {
-          url: modelBinaryPath,
-          filename: binaryFileName,
-        },
-      ],
-      parameters: {
-        ...defaultModel.parameters,
-        ...updatedModel.parameters,
-      },
-
-      settings: {
-        ...defaultModel.settings,
-        ...updatedModel.settings,
-        llama_model_path: binaryFileName,
-      },
-      created: Date.now(),
-      description: '',
-      metadata: {
-        size: binaryFileSize,
-        author: 'User',
-        tags: [],
-      },
-    }
-
-    const modelFilePath = await joinPath([
-      modelFolderPath,
-      JanModelExtension._modelMetadataFileName,
-    ])
-
-    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
-
-    return {
-      ...model,
-      file_path: modelFilePath,
-      file_name: JanModelExtension._modelMetadataFileName,
-    }
-  }
-
-  async updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile> {
-    if (modelInfo.id == null) throw new Error('Model ID is required')
-
-    const model = JSON.parse(
-      await this.readModelMetadata(modelInfo.file_path)
-    ) as ModelFile
-
-    const updatedModel: ModelFile = {
-      ...model,
-      ...modelInfo,
-      parameters: {
-        ...model.parameters,
-        ...modelInfo.parameters,
-      },
-      settings: {
-        ...model.settings,
-        ...modelInfo.settings,
-      },
-      metadata: {
-        ...model.metadata,
-        ...modelInfo.metadata,
-      },
-      // Should not persist file_path & file_name
-      file_path: undefined,
-      file_name: undefined,
-    }
-
-    await fs.writeFileSync(
-      modelInfo.file_path,
-      JSON.stringify(updatedModel, null, 2)
-    )
-    return updatedModel
-  }
-
-  private async importModel(
-    model: ImportingModel,
-    optionType: OptionType
-  ): Promise<Model> {
-    const binaryName = (await baseName(model.path)).replace(/\s/g, '')
-
-    let modelFolderName = binaryName
-    if (binaryName.endsWith(JanModelExtension._supportedModelFormat)) {
-      modelFolderName = binaryName.replace(
-        JanModelExtension._supportedModelFormat,
-        ''
-      )
-    }
-
-    const modelFolderPath = await this.getModelFolderName(modelFolderName)
-    await fs.mkdir(modelFolderPath)
-
-    const uniqueFolderName = await baseName(modelFolderPath)
-    const modelBinaryFile = binaryName.endsWith(
-      JanModelExtension._supportedModelFormat
-    )
-      ? binaryName
-      : `${binaryName}${JanModelExtension._supportedModelFormat}`
-
-    const binaryPath = await joinPath([modelFolderPath, modelBinaryFile])
-
-    if (optionType === 'SYMLINK') {
-      return this.importModelSymlink(
-        model.path,
-        uniqueFolderName,
-        modelFolderPath
-      )
-    }
-
-    const srcStat = await fs.fileStat(model.path, true)
-
-    // interval getting the file size to calculate the percentage
-    const interval = setInterval(async () => {
-      const destStats = await fs.fileStat(binaryPath, true)
-      const percentage = destStats.size / srcStat.size
-      events.emit(LocalImportModelEvent.onLocalImportModelUpdate, {
-        ...model,
-        percentage,
-      })
-    }, 1000)
-
-    await fs.copyFile(model.path, binaryPath)
-
-    clearInterval(interval)
-
-    // generate model json
-    return this.generateModelMetadata(uniqueFolderName)
-  }
-
-  private async getModelFolderName(
-    modelFolderName: string,
-    count?: number
-  ): Promise<string> {
-    const newModelFolderName = count
-      ? `${modelFolderName}-${count}`
-      : modelFolderName
-
-    const janDataFolderPath = await getJanDataFolderPath()
-    const modelFolderPath = await joinPath([
-      janDataFolderPath,
-      'models',
-      newModelFolderName,
-    ])
-
-    const isFolderExist = await fs.existsSync(modelFolderPath)
-    if (!isFolderExist) {
-      return modelFolderPath
-    } else {
-      const newCount = (count ?? 0) + 1
-      return this.getModelFolderName(modelFolderName, newCount)
-    }
-  }
-
-  async importModels(
-    models: ImportingModel[],
-    optionType: OptionType
-  ): Promise<void> {
-    const importedModels: Model[] = []
-
-    for (const model of models) {
-      events.emit(LocalImportModelEvent.onLocalImportModelUpdate, model)
-      try {
-        const importedModel = await this.importModel(model, optionType)
-        events.emit(LocalImportModelEvent.onLocalImportModelSuccess, {
-          ...model,
-          modelId: importedModel.id,
-        })
-        importedModels.push(importedModel)
-      } catch (err) {
-        events.emit(LocalImportModelEvent.onLocalImportModelFailed, {
-          ...model,
-          error: err,
-        })
-      }
-    }
-
-    events.emit(
-      LocalImportModelEvent.onLocalImportModelFinished,
-      importedModels
-    )
-  }
-
-  /**
-   * Retrieve Model Settings from GGUF Metadata
-   * @param metadata
+   * Retrieve the model.json path from a folder
+   * @param folderFullPath
    * @returns
    */
-  async retrieveGGUFMetadata(metadata: any): Promise<Partial<Model>> {
-    const defaultModel = DEFAULT_MODEL as Model
-    var template = await executeOnMain(
-      NODE,
-      'renderJinjaTemplate',
-      metadata
-    ).catch(() => undefined)
-
-    const eos_id = metadata['tokenizer.ggml.eos_token_id']
-    const architecture = metadata['general.architecture']
-
-    return {
-      settings: {
-        prompt_template: template ?? defaultModel.settings.prompt_template,
-        ctx_len:
-          metadata[`${architecture}.context_length`] ??
-          metadata['llama.context_length'] ??
-          4096,
-        ngl:
-          (metadata[`${architecture}.block_count`] ??
-            metadata['llama.block_count'] ??
-            32) + 1,
-      },
-      parameters: {
-        stop: eos_id
-          ? [metadata?.['tokenizer.ggml.tokens'][eos_id] ?? '']
-          : defaultModel.parameters.stop,
-      },
+  private async getModelJsonPath(
+    folderFullPath: string
+  ): Promise<string | undefined> {
+    // try to find model.json recursively inside each folder
+    if (!(await fs.existsSync(folderFullPath))) return undefined
+    const files: string[] = await fs.readdirSync(folderFullPath)
+    if (files.length === 0) return undefined
+    if (files.includes('model.json')) {
+      return joinPath([folderFullPath, 'model.json'])
+    }
+    // continue recursive
+    for (const file of files) {
+      const path = await joinPath([folderFullPath, file])
+      const fileStats = await fs.fileStat(path)
+      if (fileStats.isDirectory) {
+        const result = await this.getModelJsonPath(path)
+        if (result) return result
+      }
     }
   }
+  //// END LEGACY MODEL FOLDER ////
 }
diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts
deleted file mode 100644
index 2acf6ec4a..000000000
--- a/extensions/model-extension/src/node/index.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-import { closeSync, openSync, readSync } from 'fs'
-import { Template } from '@huggingface/jinja'
-/**
- * This is to retrieve the metadata from a GGUF file
- * It uses hyllama and jinja from @huggingface module
- */
-export const retrieveGGUFMetadata = async (ggufPath: string) => {
-  try {
-    const { ggufMetadata } = await import('hyllama')
-    // Read first 10mb of gguf file
-    const fd = openSync(ggufPath, 'r')
-    const buffer = new Uint8Array(10_000_000)
-    readSync(fd, buffer, 0, 10_000_000, 0)
-    closeSync(fd)
-
-    // Parse metadata and tensor info
-    const { metadata } = ggufMetadata(buffer.buffer)
-
-    return metadata
-  } catch (e) {
-    console.log('[MODEL_EXT]', e)
-  }
-}
-
-/**
- * Convert metadata to jinja template
- * @param metadata
- */
-export const renderJinjaTemplate = (metadata: any): string => {
-  const template = new Template(metadata['tokenizer.chat_template'])
-  const eos_id = metadata['tokenizer.ggml.eos_token_id']
-  const bos_id = metadata['tokenizer.ggml.bos_token_id']
-  if (eos_id === undefined || bos_id === undefined) {
-    return ''
-  }
-  const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
-  const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
-  // Parse jinja template
-  return template.render({
-    add_generation_prompt: true,
-    eos_token,
-    bos_token,
-    messages: [
-      {
-        role: 'system',
-        content: '{system_message}',
-      },
-      {
-        role: 'user',
-        content: '{prompt}',
-      },
-    ],
-  })
-}
diff --git a/extensions/model-extension/src/node/node.test.ts b/extensions/model-extension/src/node/node.test.ts
deleted file mode 100644
index afd2b8470..000000000
--- a/extensions/model-extension/src/node/node.test.ts
+++ /dev/null
@@ -1,53 +0,0 @@
-import { renderJinjaTemplate } from './index'
-import { Template } from '@huggingface/jinja'
-
-jest.mock('@huggingface/jinja', () => ({
-  Template: jest.fn((template: string) => ({
-    render: jest.fn(() => `${template}_rendered`),
-  })),
-}))
-
-describe('renderJinjaTemplate', () => {
-  beforeEach(() => {
-    jest.clearAllMocks() // Clear mocks between tests
-  })
-
-  it('should render the template with correct parameters', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.bos_token_id': 1,
-      'tokenizer.ggml.tokens': ['EOS', 'BOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered')
-  })
-
-  it('should handle missing token IDs gracefully', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.tokens': ['EOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('')
-  })
-
-  it('should handle empty template gracefully', () => {
-    const metadata = {}
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith(undefined)
-
-    expect(renderedTemplate).toBe("")
-  })
-})
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
index 11c86a9a7..d9c89242f 100644
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@@ -7,9 +7,7 @@ import {
   DownloadEvent,
   DownloadRequest,
   DownloadState,
-  GpuSetting,
   InstallationState,
-  Model,
   baseName,
   downloadFile,
   events,
@@ -23,7 +21,7 @@ import {
   ModelEvent,
   getJanDataFolderPath,
   SystemInformation,
-  ModelFile,
+  Model,
 } from '@janhq/core'
 
 /**
@@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
     events.emit(ModelEvent.OnModelsUpdate, {})
   }
 
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
     if ((await this.installationState()) === 'Installed')
       return super.loadModel(model)
 
@@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
   override async inference(data: MessageRequest) {
     if (!this.loadedModel) return
     // TensorRT LLM Extension supports streaming only
-    if (data.model) data.model.parameters.stream = true
+    if (data.model && data.model.parameters) data.model.parameters.stream = true
     super.inference(data)
   }
 
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
index 77003389f..d02427170 100644
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -41,7 +41,7 @@ async function loadModel(
   // e.g. ~/jan/models/llama-2
   let modelFolder = params.modelFolder
 
-  if (params.model.settings.prompt_template) {
+  if (params.model.settings?.prompt_template) {
     const promptTemplate = params.model.settings.prompt_template
     const prompt = promptTemplateConverter(promptTemplate)
     if (prompt?.error) {
diff --git a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
index ddc2eab91..8eb16f549 100644
--- a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
+++ b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
@@ -9,11 +9,8 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
-import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
-
 export default function DownloadingState() {
   const downloadStates = useAtomValue(modelDownloadStateAtom)
-  const downloadingModels = useAtomValue(getDownloadingModelAtom)
   const { abortModelDownload } = useDownloadModel()
 
   const totalCurrentProgress = Object.values(downloadStates)
@@ -76,10 +73,7 @@ export default function DownloadingState() {
                       theme="destructive"
                       onClick={() => {
                         if (item?.modelId) {
-                          const model = downloadingModels.find(
-                            (model) => model.id === item.modelId
-                          )
-                          if (model) abortModelDownload(model)
+                          abortModelDownload(item?.modelId)
                         }
                       }}
                     >
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
index 6ff6c894a..00d528f99 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
@@ -30,8 +30,8 @@ const TableActiveModel = () => {
                 </td>
                 <td className="px-4 py-2">
                   <Badge theme="secondary">
-                    {activeModel.metadata.size
-                      ? toGibibytes(activeModel.metadata.size)
+                    {activeModel.metadata?.size
+                      ? toGibibytes(activeModel.metadata?.size)
                       : '-'}
                   </Badge>
                 </td>
diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx
index e8d3842a8..fdc583911 100644
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@@ -30,7 +30,7 @@ const ModalCancelDownload = ({ model, isFromList }: Props) => {
   const onAbortDownloadClick = useCallback(() => {
     if (downloadState?.modelId) {
       const model = downloadingModels.find(
-        (model) => model.id === downloadState.modelId
+        (model) => model === downloadState.modelId
       )
       if (model) abortModelDownload(model)
     }
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index 59f19586a..7415f1165 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -88,7 +88,7 @@ const ModelDropdown = ({
   const searchInputRef = useRef<HTMLInputElement>(null)
   const configuredModels = useAtomValue(configuredModelsAtom)
   const featuredModel = configuredModels.filter((x) =>
-    x.metadata.tags.includes('Featured')
+    x.metadata?.tags?.includes('Featured')
   )
   const { updateThreadMetadata } = useCreateNewThread()
 
@@ -200,7 +200,7 @@ const ModelDropdown = ({
         if (model)
           updateModelParameter(activeThread, {
             params: modelParams,
-            modelPath: model.file_path,
+            // modelPath: model.file_path,
             modelId: model.id,
             engine: model.engine,
           })
@@ -444,7 +444,7 @@ const ModelDropdown = ({
                         <ul className="pb-2">
                           {featuredModel.map((model) => {
                             const isDownloading = downloadingModels.some(
-                              (md) => md.id === model.id
+                              (md) => md === model.id
                             )
                             return (
                               <li
@@ -465,13 +465,15 @@ const ModelDropdown = ({
                                 </div>
                                 <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                                   <span className="font-medium">
-                                    {toGibibytes(model.metadata.size)}
+                                    {toGibibytes(model.metadata?.size)}
                                   </span>
                                   {!isDownloading ? (
                                     <DownloadCloudIcon
                                       size={18}
                                       className="cursor-pointer text-[hsla(var(--app-link))]"
-                                      onClick={() => downloadModel(model)}
+                                      onClick={() =>
+                                        downloadModel(model.sources[0].url)
+                                      }
                                     />
                                   ) : (
                                     Object.values(downloadStates)
@@ -511,7 +513,7 @@ const ModelDropdown = ({
                         .map((model) => {
                           if (!showModel) return null
                           const isDownloading = downloadingModels.some(
-                            (md) => md.id === model.id
+                            (md) => md === model.id
                           )
                           const isDownloaded = downloadedModels.some(
                             (c) => c.id === model.id
@@ -549,14 +551,16 @@ const ModelDropdown = ({
                               <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                                 {!isDownloaded && (
                                   <span className="font-medium">
-                                    {toGibibytes(model.metadata.size)}
+                                    {toGibibytes(model.metadata?.size)}
                                   </span>
                                 )}
                                 {!isDownloading && !isDownloaded ? (
                                   <DownloadCloudIcon
                                     size={18}
                                     className="cursor-pointer text-[hsla(var(--app-link))]"
-                                    onClick={() => downloadModel(model)}
+                                    onClick={() =>
+                                      downloadModel(model.sources[0].url)
+                                    }
                                   />
                                 ) : (
                                   Object.values(downloadStates)
diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx
index b0a3da96f..a6237ada6 100644
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@@ -42,7 +42,7 @@ const ModelLabel = ({ metadata, compact }: Props) => {
     const availableRam =
       settings?.run_mode === 'gpu'
         ? availableVram * 1000000 // MB to bytes
-        : totalRam - usedRam + (activeModel?.metadata.size ?? 0)
+        : totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
     if (minimumRamModel > totalRam) {
       return (
         <NotEnoughMemoryLabel
@@ -59,10 +59,10 @@ const ModelLabel = ({ metadata, compact }: Props) => {
     return null
   }
 
-  return metadata.tags.includes('Coming Soon') ? (
+  return metadata?.tags?.includes('Coming Soon') ? (
     <UnsupportedModel />
   ) : (
-    getLabel(metadata.size ?? 0)
+    getLabel(metadata?.size ?? 0)
   )
 }
 
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index b35ab2e43..608160555 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -52,6 +52,21 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       if (state.downloadType === 'extension') {
         removeInstallingExtension(state.extensionId!)
       } else {
+        state.downloadState = 'error'
+        setDownloadState(state)
+      }
+    },
+    [setDownloadState, removeInstallingExtension]
+  )
+
+  const onFileDownloadStopped = useCallback(
+    (state: DownloadState) => {
+      console.debug('onFileDownloadError', state)
+      if (state.downloadType === 'extension') {
+        removeInstallingExtension(state.extensionId!)
+      } else {
+        state.downloadState = 'error'
+        state.error = 'aborted'
         setDownloadState(state)
       }
     },
@@ -62,6 +77,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     (state: DownloadState) => {
       console.debug('onFileDownloadSuccess', state)
       if (state.downloadType !== 'extension') {
+        state.downloadState = 'end'
         setDownloadState(state)
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
@@ -87,6 +103,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
     events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
     events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+    events.on(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped)
     events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
 
     return () => {
diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts
index aa1a7674b..811126f85 100644
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@@ -8,6 +8,7 @@ import Extension from './Extension'
  * Manages the registration and retrieval of extensions.
  */
 export class ExtensionManager {
+  date = new Date().toISOString()
   // Registered extensions
   private extensions = new Map<string, BaseExtension>()
 
diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts
index 6abc42c9e..0f5367f64 100644
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@@ -1,4 +1,4 @@
-import { ImportingModel, InferenceEngine, Model, ModelFile } from '@janhq/core'
+import { ImportingModel, InferenceEngine, Model } from '@janhq/core'
 import { atom } from 'jotai'
 import { atomWithStorage } from 'jotai/utils'
 
@@ -14,7 +14,7 @@ enum ModelStorageAtomKeys {
  * Downloaded Models Atom
  * This atom stores the list of models that have been downloaded.
  */
-export const downloadedModelsAtom = atomWithStorage<ModelFile[]>(
+export const downloadedModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.DownloadedModels,
   []
 )
@@ -23,7 +23,7 @@ export const downloadedModelsAtom = atomWithStorage<ModelFile[]>(
  * Configured Models Atom
  * This atom stores the list of models that have been configured and available to download
  */
-export const configuredModelsAtom = atomWithStorage<ModelFile[]>(
+export const configuredModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.AvailableModels,
   []
 )
@@ -43,12 +43,18 @@ export const removeDownloadedModelAtom = atom(
 /**
  * Atom to store the selected model (from ModelDropdown)
  */
-export const selectedModelAtom = atom<ModelFile | undefined>(undefined)
+export const selectedModelAtom = atom<Model | undefined>(undefined)
 
 /**
  * Atom to store the expanded engine sections (from ModelDropdown)
  */
-export const showEngineListModelAtom = atom<string[]>([InferenceEngine.nitro])
+export const showEngineListModelAtom = atom<string[]>([
+  InferenceEngine.nitro,
+  InferenceEngine.cortex,
+  InferenceEngine.cortex_llamacpp,
+  InferenceEngine.cortex_onnx,
+  InferenceEngine.cortex_tensorrtllm,
+])
 
 /// End Models Atom
 /// Model Download Atom
@@ -58,13 +64,13 @@ export const stateModel = atom({ state: 'start', loading: false, model: '' })
 /**
  * Stores the list of models which are being downloaded.
  */
-const downloadingModelsAtom = atom<Model[]>([])
+const downloadingModelsAtom = atom<string[]>([])
 
 export const getDownloadingModelAtom = atom((get) => get(downloadingModelsAtom))
 
-export const addDownloadingModelAtom = atom(null, (get, set, model: Model) => {
+export const addDownloadingModelAtom = atom(null, (get, set, model: string) => {
   const downloadingModels = get(downloadingModelsAtom)
-  if (!downloadingModels.find((e) => e.id === model.id)) {
+  if (!downloadingModels.find((e) => e === model)) {
     set(downloadingModelsAtom, [...downloadingModels, model])
   }
 })
@@ -76,7 +82,7 @@ export const removeDownloadingModelAtom = atom(
 
     set(
       downloadingModelsAtom,
-      downloadingModels.filter((e) => e.id !== modelId)
+      downloadingModels.filter((e) => e !== modelId)
     )
   }
 )
@@ -88,10 +94,6 @@ export const removeDownloadingModelAtom = atom(
 // store the paths of the models that are being imported
 export const importingModelsAtom = atom<ImportingModel[]>([])
 
-// DEPRECATED: Remove when moving to cortex.cpp
-// Default model template when importing
-export const defaultModelAtom = atom<Model | undefined>(undefined)
-
 /**
  * Importing progress Atom
  */
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 2d53678c3..8dd71fcc5 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useRef } from 'react'
 
-import { EngineManager, Model, ModelFile } from '@janhq/core'
+import { EngineManager, Model } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
@@ -11,7 +11,7 @@ import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
-export const activeModelAtom = atom<ModelFile | undefined>(undefined)
+export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)
 
 type ModelState = {
@@ -37,7 +37,7 @@ export function useActiveModel() {
   const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
   const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
 
-  const downloadedModelsRef = useRef<ModelFile[]>([])
+  const downloadedModelsRef = useRef<Model[]>([])
 
   useEffect(() => {
     downloadedModelsRef.current = downloadedModels
@@ -55,11 +55,6 @@ export function useActiveModel() {
 
     let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
 
-    const error = await stopModel().catch((error: Error) => error)
-    if (error) {
-      return Promise.reject(error)
-    }
-
     setLoadModelError(undefined)
 
     setActiveModel(undefined)
@@ -144,7 +139,7 @@ export function useActiveModel() {
     const engine = EngineManager.instance().get(stoppingModel.engine)
     return engine
       ?.unloadModel(stoppingModel)
-      .catch()
+      .catch((e) => console.error(e))
       .then(() => {
         setActiveModel(undefined)
         setStateModel({ state: 'start', loading: false, model: undefined })
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index e65353753..75aa99c27 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -8,7 +8,7 @@ import {
   ThreadAssistantInfo,
   ThreadState,
   AssistantTool,
-  ModelFile,
+  Model,
 } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'
 
@@ -76,7 +76,7 @@ export const useCreateNewThread = () => {
 
   const requestCreateNewThread = async (
     assistant: Assistant,
-    model?: ModelFile | undefined
+    model?: Model | undefined
   ) => {
     // Stop generating if any
     setIsGeneratingResponse(false)
diff --git a/web/hooks/useDeleteModel.test.ts b/web/hooks/useDeleteModel.test.ts
index 336a1cd0c..3a6587d7b 100644
--- a/web/hooks/useDeleteModel.test.ts
+++ b/web/hooks/useDeleteModel.test.ts
@@ -16,7 +16,7 @@ describe('useDeleteModel', () => {
   const mockModel: any = {
     id: 'test-model',
     name: 'Test Model',
-    // Add other required properties of ModelFile
+    // Add other required properties of Model
   }
 
   const mockDeleteModel = jest.fn()
diff --git a/web/hooks/useDeleteModel.ts b/web/hooks/useDeleteModel.ts
index 5a7a319b2..5621a78b8 100644
--- a/web/hooks/useDeleteModel.ts
+++ b/web/hooks/useDeleteModel.ts
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { ExtensionTypeEnum, ModelExtension, ModelFile } from '@janhq/core'
+import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
 
@@ -13,8 +13,8 @@ export default function useDeleteModel() {
   const removeDownloadedModel = useSetAtom(removeDownloadedModelAtom)
 
   const deleteModel = useCallback(
-    async (model: ModelFile) => {
-      await localDeleteModel(model)
+    async (model: Model) => {
+      await localDeleteModel(model.id)
       removeDownloadedModel(model.id)
       toaster({
         title: 'Model Deletion Successful',
@@ -28,7 +28,7 @@ export default function useDeleteModel() {
   return { deleteModel }
 }
 
-const localDeleteModel = async (model: ModelFile) =>
+const localDeleteModel = async (model: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
     ?.deleteModel(model)
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 0cd21ea83..82ce593e2 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -1,106 +1,47 @@
 import { useCallback } from 'react'
 
 import {
-  Model,
+  events,
   ExtensionTypeEnum,
+  ModelEvent,
   ModelExtension,
-  abortDownload,
-  joinPath,
-  ModelArtifact,
-  DownloadState,
-  GpuSetting,
-  ModelFile,
-  dirName,
 } from '@janhq/core'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useSetAtom } from 'jotai'
 
-import { setDownloadStateAtom } from './useDownloadState'
-
-import useGpuSetting from './useGpuSetting'
+import { toaster } from '@/containers/Toast'
 
 import { extensionManager } from '@/extension/ExtensionManager'
+
 import {
-  ignoreSslAtom,
-  proxyAtom,
-  proxyEnabledAtom,
-} from '@/helpers/atoms/AppConfig.atom'
-import { addDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
+  addDownloadingModelAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 export default function useDownloadModel() {
-  const ignoreSSL = useAtomValue(ignoreSslAtom)
-  const proxy = useAtomValue(proxyAtom)
-  const proxyEnabled = useAtomValue(proxyEnabledAtom)
-  const setDownloadState = useSetAtom(setDownloadStateAtom)
   const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
-
-  const { getGpuSettings } = useGpuSetting()
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
 
   const downloadModel = useCallback(
-    async (model: Model) => {
-      const childProgresses: DownloadState[] = model.sources.map(
-        (source: ModelArtifact) => ({
-          fileName: source.filename,
-          modelId: model.id,
-          time: {
-            elapsed: 0,
-            remaining: 0,
-          },
-          speed: 0,
-          percent: 0,
-          size: {
-            total: 0,
-            transferred: 0,
-          },
-          downloadState: 'downloading',
-        })
-      )
-
-      // set an initial download state
-      setDownloadState({
-        fileName: '',
-        modelId: model.id,
-        time: {
-          elapsed: 0,
-          remaining: 0,
-        },
-        speed: 0,
-        percent: 0,
-        size: {
-          total: 0,
-          transferred: 0,
-        },
-        children: childProgresses,
-        downloadState: 'downloading',
-      })
-
+    async (model: string) => {
       addDownloadingModel(model)
-      const gpuSettings = await getGpuSettings()
-      await localDownloadModel(
-        model,
-        ignoreSSL,
-        proxyEnabled ? proxy : '',
-        gpuSettings
-      )
+      localDownloadModel(model).catch((error) => {
+        if (error.message) {
+          toaster({
+            title: 'Download failed',
+            description: error.message,
+            type: 'error',
+          })
+        }
+
+        removeDownloadingModel(model)
+      })
     },
-    [
-      ignoreSSL,
-      proxy,
-      proxyEnabled,
-      getGpuSettings,
-      addDownloadingModel,
-      setDownloadState,
-    ]
+    [addDownloadingModel]
   )
 
-  const abortModelDownload = useCallback(async (model: Model | ModelFile) => {
-    for (const source of model.sources) {
-      const path =
-        'file_path' in model
-          ? await joinPath([await dirName(model.file_path), source.filename])
-          : await joinPath(['models', model.id, source.filename])
-      await abortDownload(path)
-    }
+  const abortModelDownload = useCallback(async (model: string) => {
+    await cancelModelDownload(model)
   }, [])
 
   return {
@@ -109,12 +50,12 @@ export default function useDownloadModel() {
   }
 }
 
-const localDownloadModel = async (
-  model: Model,
-  ignoreSSL: boolean,
-  proxy: string,
-  gpuSettings?: GpuSetting
-) =>
+const localDownloadModel = async (model: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
+    ?.pullModel(model)
+
+const cancelModelDownload = async (model: string) =>
+  extensionManager
+    .get<ModelExtension>(ExtensionTypeEnum.Model)
+    ?.cancelModelPull(model)
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 03a8883cb..59267749e 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -77,7 +77,7 @@ export const setDownloadStateAtom = atom(
         }
       } else {
         // download in progress
-        if (state.size.total === 0) {
+        if (state.size.total === 0 || !currentState[state.modelId]) {
           // this is initial state, just set the state
           currentState[state.modelId] = state
           set(modelDownloadStateAtom, currentState)
diff --git a/web/hooks/useGetHFRepoData.ts b/web/hooks/useGetHFRepoData.ts
index 3dab2c72e..4e3308116 100644
--- a/web/hooks/useGetHFRepoData.ts
+++ b/web/hooks/useGetHFRepoData.ts
@@ -1,12 +1,6 @@
 import { useCallback, useState } from 'react'
 
-import {
-  ExtensionTypeEnum,
-  HuggingFaceRepoData,
-  ModelExtension,
-} from '@janhq/core'
-
-import { extensionManager } from '@/extension'
+import { HuggingFaceRepoData } from '@janhq/core'
 
 export const useGetHFRepoData = () => {
   const [error, setError] = useState<string | undefined>(undefined)
@@ -35,7 +29,8 @@ export const useGetHFRepoData = () => {
 const extensionGetHfRepoData = async (
   repoId: string
 ): Promise<HuggingFaceRepoData | undefined> => {
-  return extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.fetchHuggingFaceRepoData(repoId)
+  return Promise.resolve(undefined)
+  // return extensionManager
+  //   .get<ModelExtension>(ExtensionTypeEnum.Model)
+  //   ?.fetchHuggingFaceRepoData(repoId)
 }
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index effc64f86..df6b085ca 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -104,16 +104,22 @@ const useImportModel = () => {
 const localImportModels = async (
   models: ImportingModel[],
   optionType: OptionType
-): Promise<void> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.importModels(models, optionType)
+): Promise<void> => {
+  await models
+    .filter((e) => !!e.modelId)
+    .map((model) => {
+      if (model.modelId)
+        extensionManager
+          .get<ModelExtension>(ExtensionTypeEnum.Model)
+          ?.importModel(model.modelId, model.path)
+    })
+}
 
 const localUpdateModelInfo = async (
   modelInfo: Partial<Model>
 ): Promise<Model | undefined> =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.updateModelInfo(modelInfo)
+    ?.updateModel(modelInfo)
 
 export default useImportModel
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 58def79c6..1cbd970d6 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -5,8 +5,8 @@ import {
   Model,
   ModelEvent,
   ModelExtension,
-  ModelFile,
   events,
+  ModelManager,
 } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
@@ -14,7 +14,6 @@ import { useSetAtom } from 'jotai'
 import { extensionManager } from '@/extension'
 import {
   configuredModelsAtom,
-  defaultModelAtom,
   downloadedModelsAtom,
 } from '@/helpers/atoms/Model.atom'
 
@@ -25,32 +24,22 @@ import {
  */
 const useModels = () => {
   const setDownloadedModels = useSetAtom(downloadedModelsAtom)
-  const setConfiguredModels = useSetAtom(configuredModelsAtom)
-  const setDefaultModel = useSetAtom(defaultModelAtom)
+  const setExtensionModels = useSetAtom(configuredModelsAtom)
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
-      const models = await getLocalDownloadedModels()
+      const models = await getModels()
       setDownloadedModels(models)
     }
 
-    const getConfiguredModels = async () => {
-      const models = await getLocalConfiguredModels()
-      setConfiguredModels(models)
-    }
-
-    const getDefaultModel = async () => {
-      const defaultModel = await getLocalDefaultModel()
-      setDefaultModel(defaultModel)
+    const getExtensionModels = async () => {
+      const models = ModelManager.instance().models.values().toArray()
+      setExtensionModels(models)
     }
 
     // Fetch all data
-    Promise.all([
-      getDownloadedModels(),
-      getConfiguredModels(),
-      getDefaultModel(),
-    ])
-  }, [setDownloadedModels, setConfiguredModels, setDefaultModel])
+    Promise.all([getDownloadedModels(), getExtensionModels()])
+  }, [setDownloadedModels, setExtensionModels])
 
   useEffect(() => {
     // Try get data on mount
@@ -65,22 +54,8 @@ const useModels = () => {
   }, [getData])
 }
 
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalDefaultModel = async (): Promise<Model | undefined> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDefaultModel()
-
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalConfiguredModels = async (): Promise<ModelFile[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getConfiguredModels() ?? []
-
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalDownloadedModels = async (): Promise<ModelFile[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDownloadedModels() ?? []
+const getModels = async (): Promise<Model[]> =>
+  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
+  []
 
 export default useModels
diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts
index ed56efa55..21a9c69e7 100644
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useState } from 'react'
 
-import { Model, InferenceEngine, ModelFile } from '@janhq/core'
+import { Model, InferenceEngine } from '@janhq/core'
 
 import { atom, useAtomValue } from 'jotai'
 
@@ -24,16 +24,12 @@ export const LAST_USED_MODEL_ID = 'last-used-model-id'
  */
 export default function useRecommendedModel() {
   const activeModel = useAtomValue(activeModelAtom)
-  const [sortedModels, setSortedModels] = useState<ModelFile[]>([])
-  const [recommendedModel, setRecommendedModel] = useState<
-    ModelFile | undefined
-  >()
+  const [sortedModels, setSortedModels] = useState<Model[]>([])
+  const [recommendedModel, setRecommendedModel] = useState<Model | undefined>()
   const activeThread = useAtomValue(activeThreadAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
 
-  const getAndSortDownloadedModels = useCallback(async (): Promise<
-    ModelFile[]
-  > => {
+  const getAndSortDownloadedModels = useCallback(async (): Promise<Model[]> => {
     const models = downloadedModels.sort((a, b) =>
       a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro
         ? 1
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 1dbd5b45e..bab515a30 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -123,65 +123,27 @@ export default function useSendChatMessage() {
   }
 
   const resendChatMessage = async (currentMessage: ThreadMessage) => {
-    if (!activeThreadRef.current) {
-      console.error('No active thread')
-      return
-    }
-    updateThreadWaiting(activeThreadRef.current.id, true)
+    // Delete last response before regenerating
+    const newConvoData = currentMessages
+    let toSendMessage = currentMessage
 
-    const requestBuilder = new MessageRequestBuilder(
-      MessageRequestType.Thread,
-      activeThreadRef.current.assistants[0].model ?? selectedModelRef.current,
-      activeThreadRef.current,
-      currentMessages
-    )
-      .addSystemMessage(activeThreadRef.current.assistants[0]?.instructions)
-      .removeLastAssistantMessage()
+    do {
+      deleteMessage(currentMessage.id)
+      const msg = newConvoData.pop()
+      if (!msg) break
+      toSendMessage = msg
+      deleteMessage(toSendMessage.id ?? '')
+    } while (toSendMessage.role !== ChatCompletionRole.User)
 
-    const modelId =
-      selectedModelRef.current?.id ??
-      activeThreadRef.current.assistants[0].model.id
-
-    if (modelRef.current?.id !== modelId) {
-      const error = await startModel(modelId).catch((error: Error) => error)
-      if (error) {
-        updateThreadWaiting(activeThreadRef.current.id, false)
-        return
-      }
+    if (activeThreadRef.current) {
+      await extensionManager
+        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+        ?.writeMessages(activeThreadRef.current.id, newConvoData)
     }
 
-    setIsGeneratingResponse(true)
-
-    if (currentMessage.role !== ChatCompletionRole.User) {
-      // Delete last response before regenerating
-      deleteMessage(currentMessage.id ?? '')
-      if (activeThreadRef.current) {
-        await extensionManager
-          .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-          ?.writeMessages(
-            activeThreadRef.current.id,
-            currentMessages.filter((msg) => msg.id !== currentMessage.id)
-          )
-      }
-    }
-    // Process message request with Assistants tools
-    const request = await ToolManager.instance().process(
-      requestBuilder.build(),
-      activeThreadRef.current.assistants?.flatMap(
-        (assistant) => assistant.tools ?? []
-      ) ?? []
-    )
-
-    request.messages = normalizeMessages(request.messages ?? [])
-
-    const engine =
-      requestBuilder.model?.engine ?? selectedModelRef.current?.engine ?? ''
-
-    EngineManager.instance().get(engine)?.inference(request)
+    sendChatMessage(toSendMessage.content[0]?.text.value)
   }
 
-  // Define interface extending Array prototype
-
   const sendChatMessage = async (message: string) => {
     if (!message || message.trim().length === 0) return
 
@@ -294,6 +256,7 @@ export default function useSendChatMessage() {
     )
     request.messages = normalizeMessages(request.messages ?? [])
 
+    console.log(requestBuilder.model?.engine ?? modelRequest.engine, request)
     // Request for inference
     EngineManager.instance()
       .get(requestBuilder.model?.engine ?? modelRequest.engine ?? '')
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index 44a3fd278..ce5a12957 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Button, Badge, Tooltip } from '@janhq/joi'
 
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -38,7 +38,7 @@ import {
 } from '@/helpers/atoms/SystemBar.atom'
 
 type Props = {
-  model: ModelFile
+  model: Model
   onClick: () => void
   open: string
 }
@@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
   const assistants = useAtomValue(assistantsAtom)
 
   const onDownloadClick = useCallback(() => {
-    downloadModel(model)
+    downloadModel(model.sources[0].url)
   }, [model, downloadModel])
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
@@ -81,7 +81,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
     </Button>
   )
 
-  const isDownloading = downloadingModels.some((md) => md.id === model.id)
+  const isDownloading = downloadingModels.some((md) => md === model.id)
 
   const onUseModelClick = useCallback(async () => {
     if (assistants.length === 0) {
@@ -144,7 +144,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
         <div className="inline-flex items-center space-x-2">
           <div className="hidden items-center sm:inline-flex">
             <span className="mr-4 font-semibold">
-              {toGibibytes(model.metadata.size)}
+              {toGibibytes(model.metadata?.size)}
             </span>
             <ModelLabel metadata={model.metadata} />
           </div>
diff --git a/web/screens/Hub/ModelList/ModelItem/index.tsx b/web/screens/Hub/ModelList/ModelItem/index.tsx
index ec9d885a1..a077dbffc 100644
--- a/web/screens/Hub/ModelList/ModelItem/index.tsx
+++ b/web/screens/Hub/ModelList/ModelItem/index.tsx
@@ -1,6 +1,6 @@
 import { useState } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Badge } from '@janhq/joi'
 
 import { twMerge } from 'tailwind-merge'
@@ -12,7 +12,7 @@ import ModelItemHeader from '@/screens/Hub/ModelList/ModelHeader'
 import { toGibibytes } from '@/utils/converter'
 
 type Props = {
-  model: ModelFile
+  model: Model
 }
 
 const ModelItem: React.FC<Props> = ({ model }) => {
@@ -34,7 +34,7 @@ const ModelItem: React.FC<Props> = ({ model }) => {
           <div className="flex w-full flex-col border-t border-[hsla(var(--app-border))] p-4 ">
             <div className="my-2 inline-flex items-center sm:hidden">
               <span className="mr-4 font-semibold">
-                {toGibibytes(model.metadata.size)}
+                {toGibibytes(model.metadata?.size)}
               </span>
               <ModelLabel metadata={model.metadata} />
             </div>
@@ -49,9 +49,9 @@ const ModelItem: React.FC<Props> = ({ model }) => {
                 <span className="font-semibold ">Author</span>
                 <p
                   className="mt-2 line-clamp-1 font-medium text-[hsla(var(--text-secondary))]"
-                  title={model.metadata.author}
+                  title={model.metadata?.author}
                 >
-                  {model.metadata.author}
+                  {model.metadata?.author}
                 </p>
               </div>
               <div>
@@ -66,7 +66,7 @@ const ModelItem: React.FC<Props> = ({ model }) => {
               <div>
                 <span className="mb-1 font-semibold ">Tags</span>
                 <div className="mt-2 flex flex-wrap gap-x-1 gap-y-1">
-                  {model.metadata.tags.map((tag: string) => (
+                  {model.metadata?.tags?.map((tag: string) => (
                     <Badge key={tag} title={tag} variant="soft">
                       {tag}
                     </Badge>
diff --git a/web/screens/Hub/ModelList/index.tsx b/web/screens/Hub/ModelList/index.tsx
index 8fc30d541..0d7865a81 100644
--- a/web/screens/Hub/ModelList/index.tsx
+++ b/web/screens/Hub/ModelList/index.tsx
@@ -1,6 +1,6 @@
 import { useMemo } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 
 import { useAtomValue } from 'jotai'
 
@@ -9,16 +9,16 @@ import ModelItem from '@/screens/Hub/ModelList/ModelItem'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 type Props = {
-  models: ModelFile[]
+  models: Model[]
 }
 
 const ModelList = ({ models }: Props) => {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
-  const sortedModels: ModelFile[] = useMemo(() => {
-    const featuredModels: ModelFile[] = []
-    const remoteModels: ModelFile[] = []
-    const localModels: ModelFile[] = []
-    const remainingModels: ModelFile[] = []
+  const sortedModels: Model[] = useMemo(() => {
+    const featuredModels: Model[] = []
+    const remoteModels: Model[] = []
+    const localModels: Model[] = []
+    const remainingModels: Model[] = []
     models.forEach((m) => {
       if (m.metadata?.tags?.includes('Featured')) {
         featuredModels.push(m)
@@ -30,9 +30,9 @@ const ModelList = ({ models }: Props) => {
         remainingModels.push(m)
       }
     })
-    featuredModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
-    localModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
-    remainingModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
+    featuredModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
+    localModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
+    remainingModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
     remoteModels.sort((m1, m2) => m1.name.localeCompare(m2.name))
     return [
       ...featuredModels,
diff --git a/web/screens/Hub/index.tsx b/web/screens/Hub/index.tsx
index 8148a6bb5..382cf5667 100644
--- a/web/screens/Hub/index.tsx
+++ b/web/screens/Hub/index.tsx
@@ -52,7 +52,7 @@ const HubScreen = () => {
     } else if (sortSelected === 'featured') {
       return (
         x.name.toLowerCase().includes(searchValue.toLowerCase()) &&
-        x.metadata.tags.includes('Featured')
+        x.metadata?.tags?.includes('Featured')
       )
     } else {
       return x.name.toLowerCase().includes(searchValue.toLowerCase())
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index 9c2ff14a5..454905332 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -1,11 +1,6 @@
-import { useCallback, useMemo } from 'react'
+import { useCallback } from 'react'
 
-import {
-  DownloadState,
-  HuggingFaceRepoData,
-  Model,
-  Quantization,
-} from '@janhq/core'
+import { DownloadState, HuggingFaceRepoData, Quantization } from '@janhq/core'
 import { Badge, Button, Progress } from '@janhq/joi'
 
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -24,10 +19,7 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 
 import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom'
-import {
-  defaultModelAtom,
-  downloadedModelsAtom,
-} from '@/helpers/atoms/Model.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 type Props = {
   index: number
@@ -39,7 +31,6 @@ type Props = {
 }
 
 const ModelDownloadRow: React.FC<Props> = ({
-  repoData,
   downloadUrl,
   fileName,
   fileSize = 0,
@@ -56,44 +47,18 @@ const ModelDownloadRow: React.FC<Props> = ({
   const downloadedModel = downloadedModels.find((md) => md.id === fileName)
 
   const setHfImportingStage = useSetAtom(importHuggingFaceModelStageAtom)
-  const defaultModel = useAtomValue(defaultModelAtom)
-
-  const model = useMemo(() => {
-    if (!defaultModel) {
-      return undefined
-    }
-
-    const model: Model = {
-      ...defaultModel,
-      sources: [
-        {
-          url: downloadUrl,
-          filename: fileName,
-        },
-      ],
-      id: fileName,
-      name: fileName,
-      created: Date.now(),
-      metadata: {
-        author: 'User',
-        tags: repoData.tags,
-        size: fileSize,
-      },
-    }
-    return model
-  }, [fileName, fileSize, repoData, downloadUrl, defaultModel])
 
   const onAbortDownloadClick = useCallback(() => {
-    if (model) {
-      abortModelDownload(model)
+    if (downloadUrl) {
+      abortModelDownload(downloadUrl)
     }
-  }, [model, abortModelDownload])
+  }, [downloadUrl, abortModelDownload])
 
   const onDownloadClick = useCallback(async () => {
-    if (model) {
-      downloadModel(model)
+    if (downloadUrl) {
+      downloadModel(downloadUrl)
     }
-  }, [model, downloadModel])
+  }, [downloadUrl, downloadModel])
 
   const onUseModelClick = useCallback(async () => {
     if (assistants.length === 0) {
@@ -111,7 +76,7 @@ const ModelDownloadRow: React.FC<Props> = ({
     setHfImportingStage,
   ])
 
-  if (!model) {
+  if (!downloadUrl) {
     return null
   }
 
@@ -143,7 +108,7 @@ const ModelDownloadRow: React.FC<Props> = ({
           variant="soft"
           className="min-w-[98px]"
           onClick={onUseModelClick}
-          data-testid={`use-model-btn-${model.id}`}
+          data-testid={`use-model-btn-${downloadUrl}`}
         >
           Use
         </Button>
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 7557e9952..6661ed068 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -1,6 +1,6 @@
 import { memo, useState } from 'react'
 
-import { InferenceEngine, ModelFile } from '@janhq/core'
+import { InferenceEngine, Model } from '@janhq/core'
 import { Badge, Button, Tooltip, useClickOutside } from '@janhq/joi'
 import { useAtom } from 'jotai'
 import {
@@ -21,7 +21,7 @@ import { isLocalEngine } from '@/utils/modelEngine'
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 
 type Props = {
-  model: ModelFile
+  model: Model
   groupTitle?: string
 }
 
@@ -78,7 +78,7 @@ const MyModelList = ({ model }: Props) => {
           <div className="flex gap-x-4">
             <div className="md:min-w-[90px] md:max-w-[90px]">
               <Badge theme="secondary" className="sm:mr-8">
-                {toGibibytes(model.metadata.size)}
+                {toGibibytes(model.metadata?.size)}
               </Badge>
             </div>
 
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
index 4dab6bfa8..f73efb486 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
@@ -38,20 +38,20 @@ const AssistantSetting: React.FC<Props> = ({ componentData }) => {
         (key === 'chunk_overlap' || key === 'chunk_size')
       ) {
         if (
-          activeThread.assistants[0].tools[0]?.settings.chunk_size <
-          activeThread.assistants[0].tools[0]?.settings.chunk_overlap
+          activeThread.assistants[0].tools[0]?.settings?.chunk_size <
+          activeThread.assistants[0].tools[0]?.settings?.chunk_overlap
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_overlap =
             activeThread.assistants[0].tools[0].settings.chunk_size
         }
         if (
           key === 'chunk_size' &&
-          value < activeThread.assistants[0].tools[0].settings.chunk_overlap
+          value < activeThread.assistants[0].tools[0].settings?.chunk_overlap
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_overlap = value
         } else if (
           key === 'chunk_overlap' &&
-          value > activeThread.assistants[0].tools[0].settings.chunk_size
+          value > activeThread.assistants[0].tools[0].settings?.chunk_size
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_size = value
         }
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 0ef9a9ba1..0adc7ddd4 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -69,7 +69,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
       return x.id === recommendModel[0] || x.id === recommendModel[1]
     } else {
       return (
-        x.metadata.tags.includes('Featured') && x.metadata.size < 5000000000
+        x.metadata?.tags?.includes('Featured') && x.metadata?.size < 5000000000
       )
     }
   })
@@ -143,7 +143,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                     ) : (
                       filteredModels.map((model) => {
                         const isDownloading = downloadingModels.some(
-                          (md) => md.id === model.id
+                          (md) => md === model.id
                         )
                         return (
                           <div
@@ -161,13 +161,15 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                             </div>
                             <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                               <span className="font-medium">
-                                {toGibibytes(model.metadata.size)}
+                                {toGibibytes(model.metadata?.size)}
                               </span>
                               {!isDownloading ? (
                                 <DownloadCloudIcon
                                   size={18}
                                   className="cursor-pointer text-[hsla(var(--app-link))]"
-                                  onClick={() => downloadModel(model)}
+                                  onClick={() =>
+                                    downloadModel(model.sources[0].url)
+                                  }
                                 />
                               ) : (
                                 Object.values(downloadStates)
@@ -210,7 +212,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
 
                 {featuredModel.slice(0, 2).map((featModel) => {
                   const isDownloading = downloadingModels.some(
-                    (md) => md.id === featModel.id
+                    (md) => md === featModel.id
                   )
                   return (
                     <div
@@ -253,12 +255,14 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                           <Button
                             theme="ghost"
                             className="!bg-[hsla(var(--secondary-bg))]"
-                            onClick={() => downloadModel(featModel)}
+                            onClick={() =>
+                              downloadModel(featModel.sources[0].url)
+                            }
                           >
                             Download
                           </Button>
                           <span className="text-[hsla(var(--text-secondary))]">
-                            {toGibibytes(featModel.metadata.size)}
+                            {toGibibytes(featModel.metadata?.size)}
                           </span>
                         </div>
                       )}
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index afa84b5bf..066c93430 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -155,7 +155,7 @@ const ChatInput = () => {
                     fileUpload.length > 0 ||
                     (activeThread?.assistants[0].tools &&
                       !activeThread?.assistants[0].tools[0]?.enabled &&
-                      !activeThread?.assistants[0].model.settings.vision_model)
+                      !activeThread?.assistants[0].model.settings?.vision_model)
                   ) {
                     e.stopPropagation()
                   } else {
@@ -180,7 +180,7 @@ const ChatInput = () => {
                   (activeThread?.assistants[0].tools &&
                     !activeThread?.assistants[0].tools[0]?.enabled &&
                     !activeThread?.assistants[0].model.settings
-                      .vision_model && (
+                      ?.vision_model && (
                       <>
                         {fileUpload.length !== 0 && (
                           <span>
@@ -221,13 +221,13 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings.vision_model
+                      activeThread?.assistants[0].model.settings?.vision_model
                         ? 'cursor-pointer'
                         : 'cursor-not-allowed opacity-50'
                     )}
                     onClick={() => {
                       if (
-                        activeThread?.assistants[0].model.settings.vision_model
+                        activeThread?.assistants[0].model.settings?.vision_model
                       ) {
                         imageInputRef.current?.click()
                         setShowAttacmentMenus(false)
@@ -240,7 +240,7 @@ const ChatInput = () => {
                 }
                 content="This feature only supports multimodal models."
                 disabled={
-                  activeThread?.assistants[0].model.settings.vision_model
+                  activeThread?.assistants[0].model.settings?.vision_model
                 }
               />
               <Tooltip
@@ -249,7 +249,7 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings.text_model ===
+                      activeThread?.assistants[0].model.settings?.text_model ===
                         false
                         ? 'cursor-not-allowed opacity-50'
                         : 'cursor-pointer'
@@ -257,7 +257,7 @@ const ChatInput = () => {
                     onClick={() => {
                       if (
                         activeThread?.assistants[0].model.settings
-                          .text_model !== false
+                          ?.text_model !== false
                       ) {
                         fileInputRef.current?.click()
                         setShowAttacmentMenus(false)
@@ -271,11 +271,11 @@ const ChatInput = () => {
                 content={
                   (!activeThread?.assistants[0].tools ||
                     !activeThread?.assistants[0].tools[0]?.enabled ||
-                    activeThread?.assistants[0].model.settings.text_model ===
+                    activeThread?.assistants[0].model.settings?.text_model ===
                       false) && (
                     <>
-                      {activeThread?.assistants[0].model.settings.text_model ===
-                      false ? (
+                      {activeThread?.assistants[0].model.settings
+                        ?.text_model === false ? (
                         <span>
                           This model does not support text-based retrieval.
                         </span>
diff --git a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
index cdf865ceb..c4a97a6b9 100644
--- a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
@@ -74,7 +74,7 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
           )[
             messages.filter((msg) => msg.role === ChatCompletionRole.Assistant)
               .length - 1
-          ]?.content[0].text.value,
+          ]?.content[0]?.text.value,
         },
       }
 
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index b12f859bd..fe7993e9a 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -56,7 +56,7 @@ const ThreadCenterPanel = () => {
   const activeThread = useAtomValue(activeThreadAtom)
 
   const acceptedFormat: Accept = activeThread?.assistants[0].model.settings
-    .vision_model
+    ?.vision_model
     ? {
         'application/pdf': ['.pdf'],
         'image/jpeg': ['.jpeg'],
@@ -79,7 +79,7 @@ const ThreadCenterPanel = () => {
         e.dataTransfer.items.length === 1 &&
         ((activeThread?.assistants[0].tools &&
           activeThread?.assistants[0].tools[0]?.enabled) ||
-          activeThread?.assistants[0].model.settings.vision_model)
+          activeThread?.assistants[0].model.settings?.vision_model)
       ) {
         setDragOver(true)
       } else if (
@@ -101,7 +101,7 @@ const ThreadCenterPanel = () => {
         rejectFiles.length !== 0 ||
         (activeThread?.assistants[0].tools &&
           !activeThread?.assistants[0].tools[0]?.enabled &&
-          !activeThread?.assistants[0].model.settings.vision_model)
+          !activeThread?.assistants[0].model.settings?.vision_model)
       )
         return
       const imageType = files[0]?.type.includes('image')
@@ -170,7 +170,7 @@ const ThreadCenterPanel = () => {
                     {isDragReject
                       ? `Currently, we only support 1 attachment at the same time with ${
                           activeThread?.assistants[0].model.settings
-                            .vision_model
+                            ?.vision_model
                             ? 'PDF, JPEG, JPG, PNG'
                             : 'PDF'
                         } format`
@@ -178,7 +178,7 @@ const ThreadCenterPanel = () => {
                   </h6>
                   {!isDragReject && (
                     <p className="mt-2">
-                      {activeThread?.assistants[0].model.settings.vision_model
+                      {activeThread?.assistants[0].model.settings?.vision_model
                         ? 'PDF, JPEG, JPG, PNG'
                         : 'PDF'}
                     </p>
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 0bf917015..7ccc4957a 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -182,8 +182,8 @@ const ThreadRightPanel = () => {
       })
 
       if (
-        activeThread.assistants[0].model.parameters.max_tokens &&
-        activeThread.assistants[0].model.settings.ctx_len
+        activeThread.assistants[0].model.parameters?.max_tokens &&
+        activeThread.assistants[0].model.settings?.ctx_len
       ) {
         if (
           key === 'max_tokens' &&
diff --git a/web/services/appService.test.ts b/web/services/appService.test.ts
index 37053f930..5172ea6ed 100644
--- a/web/services/appService.test.ts
+++ b/web/services/appService.test.ts
@@ -1,30 +1,32 @@
-
-import { ExtensionTypeEnum, extensionManager } from '@/extension';
-import { appService } from './appService';
+import { extensionManager } from '@/extension'
+import { appService } from './appService'
 
 test('should return correct system information when monitoring extension is found', async () => {
-  const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 };
-  const mockOsInfo = { platform: 'win32', release: '10.0.19041' };
+  const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 }
+  const mockOsInfo = { platform: 'win32', release: '10.0.19041' }
   const mockMonitoringExtension = {
     getGpuSetting: jest.fn().mockResolvedValue(mockGpuSetting),
     getOsInfo: jest.fn().mockResolvedValue(mockOsInfo),
-  };
-  extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension);
-  
-  const result = await appService.systemInformation();
-  
-  expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled();
-  expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled();
-  expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo });
-});
+  }
+  extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension)
 
+  const result = await appService.systemInformation()
+
+  expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled()
+  expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled()
+  expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo })
+})
 
 test('should log a warning when monitoring extension is not found', async () => {
-  const consoleWarnMock = jest.spyOn(console, 'warn').mockImplementation(() => {});
-  extensionManager.get = jest.fn().mockReturnValue(undefined);
-  
-  await appService.systemInformation();
-  
-  expect(consoleWarnMock).toHaveBeenCalledWith('System monitoring extension not found');
-  consoleWarnMock.mockRestore();
-});
+  const consoleWarnMock = jest
+    .spyOn(console, 'warn')
+    .mockImplementation(() => {})
+  extensionManager.get = jest.fn().mockReturnValue(undefined)
+
+  await appService.systemInformation()
+
+  expect(consoleWarnMock).toHaveBeenCalledWith(
+    'System monitoring extension not found'
+  )
+  consoleWarnMock.mockRestore()
+})