Merge pull request #3821 from janhq/feat/path-to-cortexcpp

feat: Jan Integrates Cortex.cpp as Provider
2024-11-06 15:45:48 +07:00 · 2024-11-06 15:45:48 +07:00 · a82c701087
commit a82c701087
parent 0154199161 c92b809883
177 changed files with 2835 additions and 4247 deletions
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@ -319,6 +319,13 @@ jobs:
        #   TURBO_TEAM: 'linux'
        #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'

+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: electron/playwright-report/
+          retention-days: 2
+
  coverage-check:
    runs-on: [self-hosted, Linux, ubuntu-desktop]
    needs: base_branch_cov
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@ -1 +1 @@
-npm run lint --fix
+npx oxlint@latest --fix
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
+import { events } from './events'
 import { fs } from './fs'
+import { ModelManager } from './models'

 export enum ExtensionTypeEnum {
  Assistant = 'assistant',
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
    return undefined
  }

+  /**
+   * Registers models - it persists in-memory shared ModelManager instance's data map.
+   * @param models
+   */
+  async registerModels(models: Model[]): Promise<void> {
+    for (const model of models) {
+      ModelManager.instance().register(model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Register settings for the extension.
+   * @param settings
+   * @returns
+   */
  async registerSettings(settings: SettingComponentProps[]): Promise<void> {
    if (!this.name) {
      console.error('Extension name is not defined')
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }

+  /**
+   * Get the setting value for the key.
+   * @param key
+   * @param defaultValue
+   * @returns
+   */
  async getSetting<T>(key: string, defaultValue: T) {
    const keySetting = (await this.getSettings()).find((setting) => setting.key === key)

@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
    return
  }

+  /**
+   * Get the settings for the extension.
+   * @returns
+   */
  async getSettings(): Promise<SettingComponentProps[]> {
    if (!this.name) return []

@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }

+  /**
+   * Update the settings for the extension.
+   * @param componentProps
+   * @returns
+   */
  async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
    if (!this.name) return

--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
-import { EngineManager } from './EngineManager'
-import { fs } from '../../fs'
+import { ModelEvent, Model } from '../../../types'

 jest.mock('../../events')
 jest.mock('./EngineManager')
@ -26,7 +24,7 @@ describe('AIEngine', () => {
  })

  it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any

    await engine.loadModel(model)

@ -34,7 +32,7 @@ describe('AIEngine', () => {
  })

  it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any

    await engine.loadModel(model)

--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -1,17 +1,14 @@
-import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
-import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
+import { ModelManager } from '../../models/manager'

 /**
 * Base AIEngine
 * Applicable to all AI Engines
 */
 export abstract class AIEngine extends BaseExtension {
-  private static modelsFolder = 'models'
-
  // The inference engine
  abstract provider: string

@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
  override onLoad() {
    this.registerEngine()

-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }

@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
    EngineManager.instance().register(this)
  }

-  async registerModels(models: Model[]): Promise<void> {
-    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
-
-    let shouldNotifyModelUpdate = false
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-      const isExist = await fs.existsSync(modelPath)
-
-      if (isExist) {
-        await this.migrateModelIfNeeded(model, modelPath)
-        continue
-      }
-
-      await fs.mkdir(modelPath)
-      await fs.writeFileSync(
-        await joinPath([modelPath, 'model.json']),
-        JSON.stringify(model, null, 2)
-      )
-      shouldNotifyModelUpdate = true
-    }
-
-    if (shouldNotifyModelUpdate) {
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-  }
-
-  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
-    try {
-      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
-      const currentModel: Model = JSON.parse(modelJson)
-      if (currentModel.version !== model.version) {
-        await fs.writeFileSync(
-          await joinPath([modelPath, 'model.json']),
-          JSON.stringify(model, null, 2)
-        )
-
-        events.emit(ModelEvent.OnModelsUpdate, {})
-      }
-    } catch (error) {
-      console.warn('Error while try to migrating model', error)
-    }
-  }
-
  /**
   * Loads the model.
   */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
    if (model.engine.toString() !== this.provider) return Promise.resolve()
    events.emit(ModelEvent.OnModelReady, model)
    return Promise.resolve()
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@ -1,3 +1,4 @@
+import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'

 /**
@ -20,6 +21,22 @@ export class EngineManager {
   * @returns The engine, if found.
   */
  get<T extends AIEngine>(provider: string): T | undefined {
+    // Backward compatible provider
+    // nitro is migrated to cortex
+    if (
+      [
+        InferenceEngine.nitro,
+        InferenceEngine.cortex,
+        InferenceEngine.cortex_llamacpp,
+        InferenceEngine.cortex_onnx,
+        InferenceEngine.cortex_tensorrtllm,
+        InferenceEngine.cortex_onnx,
+      ]
+        .map((e) => e.toString())
+        .includes(provider)
+    )
+      provider = InferenceEngine.cortex
+
    return this.engines.get(provider) as T | undefined
  }

@ -27,6 +44,6 @@ export class EngineManager {
   * The instance of the engine manager.
   */
  static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
  }
 }
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@ -3,7 +3,7 @@
 */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'

 jest.mock('../../core', () => ({
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
  })

  it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: null }
@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => {
  })

  it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: 'load error' }
@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => {

  it('should not unload model if engine does not match', async () => {
    const model: Model = { engine: 'otherProvider' } as any
-
    await engine.unloadModel(model)
-
    expect(executeOnMain).not.toHaveBeenCalled()
    expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
  })
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@ -1,6 +1,6 @@
-import { executeOnMain, systemInformation, dirName } from '../../core'
+import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'

 /**
@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine {
  override onLoad() {
    super.onLoad()
    // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }

  /**
   * Load the model.
   */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model & { file_path?: string }): Promise<void> {
    if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
+    const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
    const systemInfo = await systemInformation()
    const res = await executeOnMain(
      this.nodeModule,
@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine {
      events.emit(ModelEvent.OnModelStopped, {})
    })
  }
+
+  /// Legacy
+  private getModelFilePath = async (
+    id: string,
+  ): Promise<string> => {
+    return joinPath([await getJanDataFolderPath(), 'models', id])
+  }
+  ///
 }
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
   * Inference request
   */
  override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
+      events.emit(MessageEvent.OnMessageResponse, {
+        status: MessageStatus.Error,
+        content: [
+          {
+            type: ContentType.Text,
+            text: {
+              value: 'No model ID provided',
+              annotations: [],
+            },
+          },
+        ],
+      })
+      return
+    }

    const timestamp = Date.now()
    const message: ThreadMessage = {
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
      model: model.id,
      stream: true,
      ...model.parameters,
-      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
    }
    if (this.transformPayload) {
      requestBody = this.transformPayload(requestBody)
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -10,7 +10,7 @@ export function requestInference(
  requestBody: any,
  model: {
    id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
  },
  controller?: AbortController,
  headers?: HeadersInit,
@ -22,7 +22,7 @@ export function requestInference(
      headers: {
        'Content-Type': 'application/json',
        'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
        ...headers,
      },
      body: JSON.stringify(requestBody),
@ -45,7 +45,7 @@ export function requestInference(
          subscriber.complete()
          return
        }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
          const data = await response.json()
          if (transformResponse) {
            subscriber.next(transformResponse(data))
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
-  GpuSetting,
-  HuggingFaceRepoData,
-  ImportingModel,
-  Model,
-  ModelFile,
-  ModelInterface,
-  OptionType,
-} from '../../types'
+import { Model, ModelInterface, OptionType } from '../../types'

 /**
 * Model extension for managing models.
@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
    return ExtensionTypeEnum.Model
  }

-  abstract downloadModel(
-    model: Model,
-    gpuSettings?: GpuSetting,
-    network?: { proxy: string; ignoreSSL?: boolean }
-  ): Promise<void>
-  abstract cancelModelDownload(modelId: string): Promise<void>
-  abstract deleteModel(model: ModelFile): Promise<void>
-  abstract getDownloadedModels(): Promise<ModelFile[]>
-  abstract getConfiguredModels(): Promise<ModelFile[]>
-  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
-  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
-  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
-  abstract getDefaultModel(): Promise<Model>
+  abstract getModels(): Promise<Model[]>
+  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
+  abstract cancelModelPull(modelId: string): Promise<void>
+  abstract importModel(model: string, modePath: string, name?: string, optionType?: OptionType): Promise<void>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
+  abstract deleteModel(model: string): Promise<void>
 }
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@ -1,32 +1,37 @@
-import * as Core from './core';
-import * as Events from './events';
-import * as FileSystem from './fs';
-import * as Extension from './extension';
-import * as Extensions from './extensions';
-import * as Tools from './tools';
+import * as Core from './core'
+import * as Events from './events'
+import * as FileSystem from './fs'
+import * as Extension from './extension'
+import * as Extensions from './extensions'
+import * as Tools from './tools'
+import * as Models from './models'

 describe('Module Tests', () => {
  it('should export Core module', () => {
-        expect(Core).toBeDefined();
-    });
+    expect(Core).toBeDefined()
+  })

  it('should export Event module', () => {
-        expect(Events).toBeDefined();
-    });
+    expect(Events).toBeDefined()
+  })

  it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
-    });
+    expect(FileSystem).toBeDefined()
+  })

  it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
-    });
+    expect(Extension).toBeDefined()
+  })

  it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
-    });
+    expect(Extensions).toBeDefined()
+  })

  it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
-    });
-});
+    expect(Tools).toBeDefined()
+  })
+
+  it('should export all base tools', () => {
+    expect(Models).toBeDefined()
+  })
+})
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@ -33,3 +33,9 @@ export * from './extensions'
 * @module
 */
 export * from './tools'
+
+/**
+ * Export all base models.
+ * @module
+ */
+export * from './models'
--- a/core/src/browser/models/index.ts
+++ b/core/src/browser/models/index.ts
@ -0,0 +1,10 @@
+/**
+ * Export ModelManager
+ * @module
+ */
+export { ModelManager } from './manager'
+
+/**
+ * Export all utils
+ */
+export * from './utils'
--- a/core/src/browser/models/manager.ts
+++ b/core/src/browser/models/manager.ts
@ -0,0 +1,47 @@
+import { Model, ModelEvent } from '../../types'
+import { events } from '../events'
+
+/**
+ * Manages the registered models across extensions.
+ */
+export class ModelManager {
+  public models = new Map<string, Model>()
+
+  constructor() {
+    if (window) {
+      window.core.modelManager = this
+    }
+  }
+
+  /**
+   * Registers a model.
+   * @param model - The model to register.
+   */
+  register<T extends Model>(model: T) {
+    if (this.models.has(model.id)) {
+      this.models.set(model.id, {
+        ...model,
+        ...this.models.get(model.id),
+      })
+    } else {
+      this.models.set(model.id, model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Retrieves a model by it's id.
+   * @param id - The id of the model to retrieve.
+   * @returns The model, if found.
+   */
+  get<T extends Model>(id: string): T | undefined {
+    return this.models.get(id) as T | undefined
+  }
+
+  /**
+   * The instance of the tool manager.
+   */
+  static instance(): ModelManager {
+    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
+  }
+}
--- a/core/src/browser/models/utils.test.ts
+++ b/core/src/browser/models/utils.test.ts
@ -1,7 +1,10 @@
 // web/utils/modelParam.test.ts
-import { normalizeValue, validationRules } from './modelParam'
-import { extractModelLoadParams } from './modelParam';
-import { extractInferenceParams } from './modelParam';
+import {
+  normalizeValue,
+  validationRules,
+  extractModelLoadParams,
+  extractInferenceParams,
+} from './utils'

 describe('validationRules', () => {
  it('should validate temperature correctly', () => {
@ -151,13 +154,12 @@ describe('validationRules', () => {
  })
 })

-
-  it('should normalize invalid values for keys not listed in validationRules', () => {
+it('should normalize invalid values for keys not listed in validationRules', () => {
  expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
  expect(normalizeValue('invalid_key', 123)).toBe(123)
  expect(normalizeValue('invalid_key', true)).toBe(true)
  expect(normalizeValue('invalid_key', false)).toBe(false)
-  })
+})

 describe('normalizeValue', () => {
  it('should normalize ctx_len correctly', () => {
@ -192,19 +194,16 @@ describe('normalizeValue', () => {
  })
 })

+it('should handle invalid values correctly by falling back to originParams', () => {
+  const modelParams = { temperature: 'invalid', token_limit: -1 }
+  const originParams = { temperature: 0.5, token_limit: 100 }
+  expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
+})

-  it('should handle invalid values correctly by falling back to originParams', () => {
-    const modelParams = { temperature: 'invalid', token_limit: -1 };
-    const originParams = { temperature: 0.5, token_limit: 100 };
-    expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractModelLoadParams()).toEqual({})
+})

-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractModelLoadParams()).toEqual({});
-  });
-
-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractInferenceParams()).toEqual({});
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractInferenceParams()).toEqual({})
+})
--- a/core/src/browser/models/utils.ts
+++ b/core/src/browser/models/utils.ts
@ -1,26 +1,20 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/naming-convention */
-import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
-
-import { ModelParams } from '@/types/model'
+import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'

 /**
 * Validation rules for model parameters
 */
 export const validationRules: { [key: string]: (value: any) => boolean } = {
-  temperature: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 2,
+  temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
  token_limit: (value: any) => Number.isInteger(value) && value >= 0,
  top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
  top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
  stream: (value: any) => typeof value === 'boolean',
  max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
-  stop: (value: any) =>
-    Array.isArray(value) && value.every((v) => typeof v === 'string'),
-  frequency_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
-  presence_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
+  stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
+  frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
+  presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,

  ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
  ngl: (value: any) => Number.isInteger(value) && value >= 0,
@ -76,6 +70,7 @@ export const extractInferenceParams = (
    stop: undefined,
    frequency_penalty: undefined,
    presence_penalty: undefined,
+    engine: undefined,
  }

  const runtimeParams: ModelRuntimeParams = {}
@ -119,11 +114,18 @@ export const extractModelLoadParams = (
    embedding: undefined,
    n_parallel: undefined,
    cpu_threads: undefined,
+    pre_prompt: undefined,
+    system_prompt: undefined,
+    ai_prompt: undefined,
+    user_prompt: undefined,
    prompt_template: undefined,
+    model_path: undefined,
    llama_model_path: undefined,
    mmproj: undefined,
+    cont_batching: undefined,
    vision_model: undefined,
    text_model: undefined,
+    engine: undefined,
  }
  const settingParams: ModelSettingParams = {}

--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({

 jest.mock('../../helper/path', () => ({
  validatePath: jest.fn().mockReturnValue('path/to/folder'),
-  normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
+  normalizeFilePath: () =>
+    process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
 }))

 jest.mock(
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@ -50,11 +50,6 @@ export class Downloader implements Processor {
    const initialDownloadState: DownloadState = {
      modelId,
      fileName,
-      time: {
-        elapsed: 0,
-        remaining: 0,
-      },
-      speed: 0,
      percent: 0,
      size: {
        total: 0,
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {

  // add engine for new cortex cpp engine
  if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'cortex.llamacpp'
+    request.body.engine = 'llama-cpp'
  }

  const fetch = require('node-fetch')
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ b/core/src/node/api/restful/helper/consts.test.ts
@ -1,6 +1,5 @@
+import { CORTEX_DEFAULT_PORT } from './consts'

-import { NITRO_DEFAULT_PORT } from './consts';
-
-it('should test NITRO_DEFAULT_PORT', () => {
-  expect(NITRO_DEFAULT_PORT).toBe(3928);
-});
+it('should test CORTEX_DEFAULT_PORT', () => {
+  expect(CORTEX_DEFAULT_PORT).toBe(39291)
+})
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@ -1,19 +1,9 @@
 // The PORT to use for the Nitro subprocess
-export const NITRO_DEFAULT_PORT = 3928
+export const CORTEX_DEFAULT_PORT = 39291

 // The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'

 export const SUPPORTED_MODEL_FORMAT = '.gguf'

-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
-// The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-
-// The URL for the Nitro subprocess to kill itself
-export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ b/core/src/node/api/restful/helper/startStopModel.test.ts
@ -1,16 +1,10 @@
+import { startModel } from './startStopModel'

-
-  import { startModel } from './startStopModel'
-  
-  describe('startModel', () => {
+describe('startModel', () => {
  it('test_startModel_error', async () => {
    const modelId = 'testModelId'
    const settingParams = undefined

-      const result = await startModel(modelId, settingParams)
-  
-      expect(result).toEqual({
-        error: expect.any(Error),
-      })
-    })
+    expect(startModel(modelId, settingParams)).resolves.toThrow()
  })
+})
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@ -1,6 +1,5 @@
-import { join } from 'path'
-import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
 import { ModelSettingParams } from '../../../../types'
+import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'

 /**
 * Start a model
@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types'
 * @returns
 */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  try {
-    await runModel(modelId, settingParams)
-
-    return {
-      message: `Model ${modelId} started`,
-    }
-  } catch (e) {
-    return {
-      error: e,
-    }
-  }
-}
-
-/**
- * Run a model using installed cortex extension
- * @param model
- * @param settingParams
- */
-const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
-  const janDataFolderPath = getJanDataFolderPath()
-  const modelFolder = join(janDataFolderPath, 'models', model)
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .loadModel(
-        {
-          modelFolder,
-          model,
-        },
-        settingParams
-      )
-      .then(() => log(`[SERVER]::Debug: Model is loaded`))
-      .then({
-        message: 'Model started',
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
+    method: 'POST',
+    body: JSON.stringify({ model: modelId, ...settingParams }),
  })
-  )
 }
+
 /*
- * Stop model and kill nitro process.
+ * Stop model.
 */
-export const stopModel = async (_modelId: string) => {
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .unloadModel()
-      .then(() => log(`[SERVER]::Debug: Model is unloaded`))
-      .then({
-        message: 'Model stopped',
+export const stopModel = async (modelId: string) => {
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
+    method: 'POST',
+    body: JSON.stringify({ model: modelId }),
  })
-  )
 }
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -72,6 +72,8 @@ export enum DownloadEvent {
  onFileDownloadUpdate = 'onFileDownloadUpdate',
  onFileDownloadError = 'onFileDownloadError',
  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadStopped = 'onFileDownloadStopped',
+  onFileDownloadStarted = 'onFileDownloadStarted',
  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }

--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@ -6,8 +6,8 @@ export type FileStat = {
 export type DownloadState = {
  modelId: string // TODO: change to download id
  fileName: string
-  time: DownloadTime
-  speed: number
+  time?: DownloadTime
+  speed?: number

  percent: number
  size: DownloadSize
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
 */
 export type ModelInfo = {
  id: string
-  settings: ModelSettingParams
-  parameters: ModelRuntimeParams
+  settings?: ModelSettingParams
+  parameters?: ModelRuntimeParams
  engine?: InferenceEngine
 }

@ -15,7 +15,6 @@ export type ModelInfo = {
 * Represents the inference engine.
 * @stored
 */
-
 export enum InferenceEngine {
  anthropic = 'anthropic',
  mistral = 'mistral',
@ -28,11 +27,13 @@ export enum InferenceEngine {
  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
  cohere = 'cohere',
  nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
-  cortex_onnx = 'cortex.onnx',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex = 'cortex',
+  cortex_llamacpp = 'llama-cpp',
+  cortex_onnx = 'onnxruntime',
+  cortex_tensorrtllm = 'tensorrt-llm',
 }

+// Represents an artifact of a model, including its filename and URL
 export type ModelArtifact = {
  filename: string
  url: string
@ -104,6 +105,7 @@ export type Model = {
  engine: InferenceEngine
 }

+// Represents metadata associated with a model
 export type ModelMetadata = {
  author: string
  tags: string[]
@ -124,14 +126,20 @@ export type ModelSettingParams = {
  n_parallel?: number
  cpu_threads?: number
  prompt_template?: string
+  pre_prompt?: string
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
+  // path param
+  model_path?: string
+  // legacy path param
  llama_model_path?: string
+  // clip model path
  mmproj?: string
  cont_batching?: boolean
  vision_model?: boolean
  text_model?: boolean
+  engine?: boolean
 }

 /**
@ -150,11 +158,12 @@ export type ModelRuntimeParams = {
  engine?: string
 }

+// Represents a model that failed to initialize, including the error
 export type ModelInitFailed = Model & {
  error: Error
 }

 /**
- * ModelFile is the model.json entity and it's file metadata
+ * ModelParams types
 */
-export type ModelFile = Model & FileMetadata
+export type ModelParams = ModelRuntimeParams | ModelSettingParams
--- a/core/src/types/model/modelImport.ts
+++ b/core/src/types/model/modelImport.ts
@ -1,4 +1,4 @@
-export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
+export type OptionType = 'symlink' | 'copy'

 export type ModelImportOption = {
  type: OptionType
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
-import { Model, ModelFile } from './modelEntity'
+import { Model } from './modelEntity'
+import { OptionType } from './modelImport'

 /**
 * Model extension for managing models.
@ -8,38 +8,46 @@ export interface ModelInterface {
  /**
   * Downloads a model.
   * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
   * @returns A Promise that resolves when the model has been downloaded.
   */
-  downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void>
+  pullModel(model: string, id?: string, name?: string): Promise<void>

  /**
   * Cancels the download of a specific model.
   * @param {string} modelId - The ID of the model to cancel the download for.
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(model: string): Promise<void>

  /**
   * Deletes a model.
   * @param modelId - The ID of the model to delete.
   * @returns A Promise that resolves when the model has been deleted.
   */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>

  /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
   * @returns A Promise that resolves with an array of downloaded models.
   */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>

  /**
-   * Gets a list of configured models.
-   * @returns A Promise that resolves with an array of configured models.
+   * Update a pulled model's metadata
+   * @param model - The model to update.
+   * @returns A Promise that resolves when the model has been updated.
   */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
+
+  /**
+   * Import an existing model file.
+   * @param model id of the model to import
+   * @param modelPath - path of the model file
+   */
+  importModel(
+    model: string,
+    modePath: string,
+    name?: string,
+    optionType?: OptionType
+  ): Promise<void>
 }
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@ -1,16 +1,13 @@
+import * as monitoringInterface from './monitoringInterface'
+import * as resourceInfo from './resourceInfo'

-import * as monitoringInterface from './monitoringInterface';
-import * as resourceInfo from './resourceInfo';
+import * as index from './index'

-    import * as index from './index';
-    import * as monitoringInterface from './monitoringInterface';
-    import * as resourceInfo from './resourceInfo';
-    
-    it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
+it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
  for (const key in monitoringInterface) {
-        expect(index[key]).toBe(monitoringInterface[key]);
+    expect(index[key]).toBe(monitoringInterface[key])
  }
  for (const key in resourceInfo) {
-        expect(index[key]).toBe(resourceInfo[key]);
+    expect(index[key]).toBe(resourceInfo[key])
  }
-    });
+})
--- a/electron/package.json
+++ b/electron/package.json
@ -18,7 +18,8 @@
      "docs/**/*",
      "scripts/**/*",
      "icons/**/*",
-      "themes"
+      "themes",
+      "shared"
    ],
    "asarUnpack": [
      "pre-install",
@ -26,7 +27,8 @@
      "docs",
      "scripts",
      "icons",
-      "themes"
+      "themes",
+      "shared"
    ],
    "publish": [
      {
--- a/electron/shared/.gitkeep
+++ b/electron/shared/.gitkeep
--- a/electron/tests/config/fixtures.ts
+++ b/electron/tests/config/fixtures.ts
@ -15,6 +15,8 @@ import {
 import { Constants } from './constants'
 import { HubPage } from '../pages/hubPage'
 import { CommonActions } from '../pages/commonActions'
+import { rmSync } from 'fs'
+import * as path from 'path'

 export let electronApp: ElectronApplication
 export let page: Page
@ -103,10 +105,14 @@ export const test = base.extend<
    },
    { auto: true },
  ],
-  
 })

 test.beforeAll(async () => {
+  await rmSync(path.join(__dirname, '../../test-data'), {
+    recursive: true,
+    force: true,
+  })
+
  test.setTimeout(TIMEOUT)
  await setupElectron()
  await page.waitForSelector('img[alt="Jan - Logo"]', {
--- a/electron/tests/e2e/hub.e2e.spec.ts
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@ -16,7 +16,8 @@ test.beforeAll(async () => {
 test('explores hub', async ({ hubPage }) => {
  await hubPage.navigateByMenu()
  await hubPage.verifyContainerVisible()
-  const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first()
+  await hubPage.scrollToBottom()
+  const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()

  await expect(useModelBtn).toBeVisible({
    timeout: TIMEOUT,
--- a/electron/tests/pages/basePage.ts
+++ b/electron/tests/pages/basePage.ts
@ -8,9 +8,8 @@ export class BasePage {
  constructor(
    protected readonly page: Page,
    readonly action: CommonActions,
-    protected containerId: string,
-  ) {
-  }
+    protected containerId: string
+  ) {}

  public getValue(key: string) {
    return this.action.getValue(key)
@ -37,6 +36,12 @@ export class BasePage {
    expect(container.isVisible()).toBeTruthy()
  }

+  async scrollToBottom() {
+    await this.page.evaluate(() => {
+      window.scrollTo(0, document.body.scrollHeight)
+    })
+  }
+
  async waitUpdateLoader() {
    await this.isElementVisible('img[alt="Jan - Logo"]')
  }
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@ -63,12 +63,15 @@ export default class JanAssistantExtension extends AssistantExtension {
  }

  async getAssistants(): Promise<Assistant[]> {
+    try {
      // get all the assistant directories
      // get all the assistant metadata json
      const results: Assistant[] = []
+
      const allFileName: string[] = await fs.readdirSync(
        JanAssistantExtension._homeDir
      )
+
      for (const fileName of allFileName) {
        const filePath = await joinPath([
          JanAssistantExtension._homeDir,
@ -96,6 +99,10 @@ export default class JanAssistantExtension extends AssistantExtension {
      }

      return results
+    } catch (err) {
+      console.debug(err)
+      return [this.defaultAssistant]
+    }
  }

  async deleteAssistant(assistant: Assistant): Promise<void> {
@ -112,7 +119,10 @@ export default class JanAssistantExtension extends AssistantExtension {
  }

  private async createJanAssistant(): Promise<void> {
-    const janAssistant: Assistant = {
+    await this.createAssistant(this.defaultAssistant)
+  }
+
+  private defaultAssistant: Assistant = {
    avatar: '',
    thread_location: undefined,
    id: 'jan',
@ -144,7 +154,4 @@ Helpful Answer:`,
    file_ids: [],
    metadata: undefined,
  }
-
-    await this.createAssistant(janAssistant)
-  }
 }
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'

-import path from 'path'
-
 export class Retrieval {
  public chunkSize: number = 100
  public chunkOverlap?: number = 0
--- a/extensions/inference-cortex-extension/.gitignore
+++ b/extensions/inference-cortex-extension/.gitignore
--- a/extensions/inference-cortex-extension/README.md
+++ b/extensions/inference-cortex-extension/README.md
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -0,0 +1 @@
+1.0.2-rc4
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -0,0 +1,41 @@
+@echo off
+set BIN_PATH=./bin
+set SHARED_PATH=./../../electron/shared
+set /p CORTEX_VERSION=<./bin/version.txt
+
+@REM Download cortex.llamacpp binaries
+set VERSION=v0.1.35
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
+set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
+
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+
+move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
+del %BIN_PATH%\cortex-beta.exe
+del %BIN_PATH%\cortex.exe
+
+@REM Loop through each folder and move DLLs (excluding engine.dll)
+for %%F in (%SUBFOLDERS%) do (
+    echo Processing folder: %BIN_PATH%\%%F
+
+    @REM Move all .dll files except engine.dll
+    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+        if /I not "%%~nxD"=="engine.dll" (
+            move "%%D" "%BIN_PATH%"
+        )
+    )
+)
+
+echo DLL files moved successfully.
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -0,0 +1,47 @@
+#!/bin/bash
+
+# Read CORTEX_VERSION
+CORTEX_VERSION=$(cat ./bin/version.txt)
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+# Detect platform
+OS_TYPE=$(uname)
+
+if [ "$OS_TYPE" == "Linux" ]; then
+    # Linux downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
+
+    # Download engines for Linux
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+
+elif [ "$OS_TYPE" == "Darwin" ]; then
+    # macOS downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
+
+    # Download engines for macOS
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+
+else
+    echo "Unsupported operating system: $OS_TYPE"
+    exit 1
+fi
--- a/extensions/inference-cortex-extension/jest.config.js
+++ b/extensions/inference-cortex-extension/jest.config.js
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -10,12 +10,12 @@
  "scripts": {
    "test": "jest",
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "downloadcortex:linux:darwin": "./download.sh",
+    "downloadcortex:win32": "download.bat",
+    "downloadcortex": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
  },
  "exports": {
    ".": "./dist/index.js",
@ -50,6 +50,8 @@
    "cpu-instructions": "^0.0.13",
    "decompress": "^4.2.1",
    "fetch-retry": "^5.0.6",
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
    "terminate": "2.6.1",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
--- a/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@ -31,5 +31,5 @@
      "tags": ["34B", "Finetuned"],
      "size": 21556982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@ -31,5 +31,5 @@
      "tags": ["7B", "Finetuned"],
      "size": 5056982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@ -31,5 +31,5 @@
    "tags": ["Vision"],
    "size": 5750000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@ -31,6 +31,6 @@
      "tags": ["22B", "Finetuned", "Featured"],
      "size": 13341237440
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@ -31,6 +31,6 @@
      "tags": ["34B", "Finetuned"],
      "size": 21500000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny"],
    "size": 1430000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["33B"],
    "size": 19940000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@ -31,5 +31,5 @@
    "tags": ["2B", "Finetuned", "Tiny"],
    "size": 1630000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 5330000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 16600000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@ -38,5 +38,5 @@
    ],
    "size": 1710000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 5760000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@ -31,5 +31,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 43920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Foundational Model"],
    "size": 4080000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@ -31,5 +31,5 @@
      "tags": ["8B"],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 42500000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 4920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["1B", "Featured"],
    "size": 1320000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["3B", "Featured"],
    "size": 3420000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 1170000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@ -32,5 +32,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@ -30,5 +30,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 26440000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["Recommended", "7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 2320000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@ -34,5 +34,5 @@
      ],
      "size": 8366000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["34B", "Finetuned"],
    "size": 20220000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4770000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Finetuned"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["14B", "Featured"],
      "size": 8990000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["32B"],
      "size": 19900000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["72B"],
      "size": 47400000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Featured"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Featured"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
@ -31,5 +31,5 @@
      "tags": ["3B", "Finetuned", "Tiny"],
      "size": 2970000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny", "Foundation Model"],
    "size": 669000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
@ -31,5 +31,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Finetuned"],
      "size": 4410000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
  
--- a/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
@ -31,5 +31,5 @@
    "tags": ["Recommended", "13B", "Finetuned"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["34B", "Foundational Model"],
    "size": 20660000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@ -114,19 +114,7 @@ export default [
        ]),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        INFERENCE_URL: JSON.stringify(
-          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/server/chat_completion'
-        ),
-        TROUBLESHOOTING_URL: JSON.stringify(
-          'https://jan.ai/guides/troubleshooting'
-        ),
-        JAN_SERVER_INFERENCE_URL: JSON.stringify(
-          'http://localhost:1337/v1/chat/completions'
-        ),
-        CUDA_DOWNLOAD_URL: JSON.stringify(
-          'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
-        ),
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
      }),
      // Allow json resolution
      json(),
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,7 +1,5 @@
 declare const NODE: string
-declare const INFERENCE_URL: string
-declare const TROUBLESHOOTING_URL: string
-declare const JAN_SERVER_INFERENCE_URL: string
+declare const CORTEX_API_URL: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>

--- a/extensions/inference-cortex-extension/src/babel.config.js
+++ b/extensions/inference-cortex-extension/src/babel.config.js
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -0,0 +1,168 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module inference-extension/src/index
+ */
+
+import {
+  Model,
+  executeOnMain,
+  systemInformation,
+  joinPath,
+  dirName,
+  LocalOAIEngine,
+  InferenceEngine,
+  getJanDataFolderPath,
+  extractModelLoadParams,
+} from '@janhq/core'
+import PQueue from 'p-queue'
+import ky from 'ky'
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class JanInferenceCortexExtension extends LocalOAIEngine {
+  // DEPRECATED
+  nodeModule: string = 'node'
+
+  queue = new PQueue({ concurrency: 1 })
+
+  provider: string = InferenceEngine.cortex
+
+  /**
+   * The URL for making inference requests.
+   */
+  inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
+
+  /**
+   * Subscribes to events emitted by the @janhq/core package.
+   */
+  async onLoad() {
+    const models = MODELS as Model[]
+
+    this.registerModels(models)
+
+    super.onLoad()
+
+    // Run the process watchdog
+    const systemInfo = await systemInformation()
+    await this.clean()
+    await executeOnMain(NODE, 'run', systemInfo)
+
+    this.queue.add(() => this.healthz())
+
+    window.addEventListener('beforeunload', () => {
+      this.clean()
+    })
+  }
+
+  onUnload(): void {
+    this.clean()
+    executeOnMain(NODE, 'dispose')
+    super.onUnload()
+  }
+
+  override async loadModel(
+    model: Model & { file_path?: string }
+  ): Promise<void> {
+    if (
+      model.engine === InferenceEngine.nitro &&
+      model.settings.llama_model_path
+    ) {
+      // Legacy chat model support
+      model.settings = {
+        ...model.settings,
+        llama_model_path: await getModelFilePath(
+          model,
+          model.settings.llama_model_path
+        ),
+      }
+    } else {
+      const { llama_model_path, ...settings } = model.settings
+      model.settings = settings
+    }
+
+    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+      // Legacy clip vision model support
+      model.settings = {
+        ...model.settings,
+        mmproj: await getModelFilePath(model, model.settings.mmproj),
+      }
+    } else {
+      const { mmproj, ...settings } = model.settings
+      model.settings = settings
+    }
+
+    return await ky
+      .post(`${CORTEX_API_URL}/v1/models/start`, {
+        json: {
+          ...extractModelLoadParams(model.settings),
+          model: model.id,
+          engine:
+            model.engine === InferenceEngine.nitro // Legacy model cache
+              ? InferenceEngine.cortex_llamacpp
+              : model.engine,
+        },
+      })
+      .json()
+      .catch(async (e) => {
+        throw (await e.response?.json()) ?? e
+      })
+      .then()
+  }
+
+  override async unloadModel(model: Model): Promise<void> {
+    return ky
+      .post(`${CORTEX_API_URL}/v1/models/stop`, {
+        json: { model: model.id },
+      })
+      .json()
+      .then()
+  }
+
+  /**
+   * Do health check on cortex.cpp
+   * @returns
+   */
+  healthz(): Promise<void> {
+    return ky
+      .get(`${CORTEX_API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => {})
+  }
+
+  /**
+   * Clean cortex processes
+   * @returns
+   */
+  clean(): Promise<any> {
+    return ky
+      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
+        timeout: 2000, // maximum 2 seconds
+      })
+      .catch(() => {
+        // Do nothing
+      })
+  }
+}
+
+/// Legacy
+export const getModelFilePath = async (
+  model: Model,
+  file: string
+): Promise<string> => {
+  // Symlink to the model file
+  if (!model.sources[0]?.url.startsWith('http')) {
+    return model.sources[0]?.url
+  }
+  return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
+}
+///
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableNitroFile } from './execute'
+import { executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'

@ -27,10 +27,10 @@ jest.mock('cpu-instructions', () => ({
    cpuInfo: jest.fn(),
  },
 }))
-let mock = cpuInfo.cpuInfo as jest.Mock
-mock.mockReturnValue([])
+let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
+mockCpuInfo.mockReturnValue([])

-describe('test executable nitro file', () => {
+describe('test executable cortex file', () => {
  afterAll(function () {
    Object.defineProperty(process, 'platform', {
      value: originalPlatform,
@ -44,10 +44,14 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'arm64',
    })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-arm64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
+        enginePath: expect.stringContaining(`arm64`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`cortex-server`)
+            : expect.anything(),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -55,10 +59,14 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-x64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+        enginePath: expect.stringContaining(`x64`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`cortex-server`)
+            : expect.anything(),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -73,10 +81,12 @@ describe('test executable nitro file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['avx'])
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`win`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        enginePath: expect.stringContaining(`avx`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -107,10 +117,12 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['avx2'])
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -141,10 +153,12 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['noavx'])
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -159,10 +173,11 @@ describe('test executable nitro file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['noavx'])
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`linux`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        enginePath: expect.stringContaining(`noavx`),
+        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -193,10 +208,11 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        enginePath: expect.stringContaining(`cuda-11-7`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -227,10 +243,11 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        enginePath: expect.stringContaining(`cuda-12-0`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -249,12 +266,13 @@ describe('test executable nitro file', () => {

    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])

-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          enginePath: expect.stringContaining(instruction),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),

          cudaVisibleDevices: '',
          vkVisibleDevices: '',
@ -273,11 +291,12 @@ describe('test executable nitro file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`win-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          enginePath: expect.stringContaining(instruction),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
@ -312,11 +331,12 @@ describe('test executable nitro file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`win-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -351,11 +371,12 @@ describe('test executable nitro file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -391,11 +412,12 @@ describe('test executable nitro file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([instruction])
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-vulkan`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          enginePath: expect.stringContaining(`vulkan`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -417,11 +439,15 @@ describe('test executable nitro file', () => {
        ...testSettings,
        run_mode: 'cpu',
      }
-      mock.mockReturnValue([])
-      expect(executableNitroFile(settings)).toEqual(
+      mockCpuInfo.mockReturnValue([])
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`mac-x64`),
-          executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+          enginePath: expect.stringContaining(`x64`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath:
+            originalPlatform === 'darwin'
+              ? expect.stringContaining(`cortex-server`)
+              : expect.anything(),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@ -2,8 +2,9 @@ import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'

-export interface NitroExecutableOptions {
+export interface CortexExecutableOptions {
  enginePath: string
+  binPath: string
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
@ -36,8 +37,8 @@ const os = (): string => {
    ? 'win'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
-        ? 'mac-arm64'
-        : 'mac-x64'
+        ? 'arm64'
+        : 'x64'
      : 'linux'
 }

@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
-const cpuInstructions = () => {
+const cpuInstructions = (): string => {
  if (process.platform === 'darwin') return ''
  return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
    ? 'avx512'
@ -81,29 +82,32 @@ const cpuInstructions = () => {
 * Find which executable file to run based on the current platform.
 * @returns The name of the executable file to run.
 */
-export const executableNitroFile = (
+export const executableCortexFile = (
  gpuSetting?: GpuSetting
-): NitroExecutableOptions => {
-  let engineFolder = [
-    os(),
-    ...(gpuSetting?.vulkan
-      ? []
+): CortexExecutableOptions => {
+  const cpuInstruction = cpuInstructions()
+  let engineFolder = gpuSetting?.vulkan
+    ? 'vulkan'
+    : process.platform === 'darwin'
+      ? os()
      : [
-          gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
+        gpuRunMode(gpuSetting) !== 'cuda' ||
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? cpuInstruction
+          : 'noavx',
        gpuRunMode(gpuSetting),
        cudaVersion(gpuSetting),
-        ]),
-    gpuSetting?.vulkan ? 'vulkan' : undefined,
      ]
        .filter((e) => !!e)
        .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
-
+  let binaryName = `cortex-server${extension()}`
+  const binPath = path.join(__dirname, '..', 'bin')
  return {
-    enginePath: path.join(__dirname, '..', 'bin', engineFolder),
-    executablePath: path.join(__dirname, '..', 'bin', binaryName),
+    enginePath: path.join(binPath, engineFolder),
+    executablePath: path.join(binPath, binaryName),
+    binPath: binPath,
    cudaVisibleDevices,
    vkVisibleDevices,
  }
--- a/extensions/inference-cortex-extension/src/node/index.test.ts
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@ -0,0 +1,94 @@
+jest.mock('@janhq/core/node', () => ({
+  ...jest.requireActual('@janhq/core/node'),
+  getJanDataFolderPath: () => '',
+  getSystemResourceInfo: () => {
+    return {
+      cpu: {
+        cores: 1,
+        logicalCores: 1,
+        threads: 1,
+        model: 'model',
+        speed: 1,
+      },
+      memory: {
+        total: 1,
+        free: 1,
+      },
+      gpu: {
+        model: 'model',
+        memory: 1,
+        cuda: {
+          version: 'version',
+          devices: 'devices',
+        },
+        vulkan: {
+          version: 'version',
+          devices: 'devices',
+        },
+      },
+    }
+  },
+}))
+
+jest.mock('fs', () => ({
+  default: {
+    readdirSync: () => [],
+  },
+}))
+
+jest.mock('child_process', () => ({
+  exec: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+    }
+  },
+  spawn: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+      pid: '111',
+    }
+  },
+}))
+
+jest.mock('./execute', () => ({
+  executableCortexFile: () => {
+    return {
+      enginePath: 'enginePath',
+      executablePath: 'executablePath',
+      cudaVisibleDevices: 'cudaVisibleDevices',
+      vkVisibleDevices: 'vkVisibleDevices',
+    }
+  },
+}))
+
+import index from './index'
+
+describe('dispose', () => {
+  it('should dispose a model successfully on Mac', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+
+    // Call the dispose function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+
+  it('should kill the subprocess successfully on Windows', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'win32',
+    })
+
+    // Call the killSubprocess function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+})
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -0,0 +1,103 @@
+import path from 'path'
+import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
+import { executableCortexFile } from './execute'
+import { ProcessWatchdog } from './watchdog'
+import { appResourcePath } from '@janhq/core/node'
+
+// The HOST address to use for the Nitro subprocess
+const LOCAL_PORT = '39291'
+let watchdog: ProcessWatchdog | undefined = undefined
+
+/**
+ * Spawns a Nitro subprocess.
+ * @returns A promise that resolves when the Nitro subprocess is started.
+ */
+function run(systemInfo?: SystemInformation): Promise<any> {
+  log(`[CORTEX]:: Spawning cortex subprocess...`)
+
+  return new Promise<void>(async (resolve, reject) => {
+    let executableOptions = executableCortexFile(
+      // If ngl is not set or equal to 0, run on CPU with correct instructions
+      systemInfo?.gpuSetting
+        ? {
+          ...systemInfo.gpuSetting,
+          run_mode: systemInfo.gpuSetting.run_mode,
+        }
+        : undefined
+    )
+
+    // Execute the binary
+    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
+    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
+
+    addEnvPaths(path.join(appResourcePath(), 'shared'))
+    addEnvPaths(executableOptions.binPath)
+    addEnvPaths(executableOptions.enginePath)
+
+    const dataFolderPath = getJanDataFolderPath()
+    watchdog = new ProcessWatchdog(
+      executableOptions.executablePath,
+      [
+        '--start-server',
+        '--port',
+        LOCAL_PORT.toString(),
+        '--config_file_path',
+        `${path.join(dataFolderPath, '.janrc')}`,
+        '--data_folder_path',
+        dataFolderPath,
+      ],
+      {
+        cwd: executableOptions.enginePath,
+        env: {
+          ...process.env,
+          ENGINE_PATH: executableOptions.enginePath,
+          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+          // Vulkan - Support 1 device at a time for now
+          ...(executableOptions.vkVisibleDevices?.length > 0 && {
+            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
+          }),
+        },
+      }
+    )
+    watchdog.start()
+    resolve()
+  })
+}
+
+/**
+ * Every module should have a dispose function
+ * This will be called when the extension is unloaded and should clean up any resources
+ * Also called when app is closed
+ */
+function dispose() {
+  watchdog?.terminate()
+}
+
+function addEnvPaths(dest: string) {
+  // Add engine path to the PATH and LD_LIBRARY_PATH
+  if (process.platform === 'win32') {
+    process.env.PATH = (process.env.PATH || '').concat(
+      path.delimiter,
+      dest,
+    )
+    log(`[CORTEX] PATH: ${process.env.PATH}`)
+  } else {
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      dest,
+    )
+    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
+  }
+}
+
+/**
+ * Cortex process info
+ */
+export interface CortexProcessInfo {
+  isRunning: boolean
+}
+
+export default {
+  run,
+  dispose,
+}
--- a/extensions/inference-cortex-extension/src/node/watchdog.ts
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@ -0,0 +1,84 @@
+import { log } from '@janhq/core/node'
+import { spawn, ChildProcess } from 'child_process'
+import { EventEmitter } from 'events'
+
+interface WatchdogOptions {
+  cwd?: string
+  restartDelay?: number
+  maxRestarts?: number
+  env?: NodeJS.ProcessEnv
+}
+
+export class ProcessWatchdog extends EventEmitter {
+  private command: string
+  private args: string[]
+  private options: WatchdogOptions
+  private process: ChildProcess | null
+  private restartDelay: number
+  private maxRestarts: number
+  private restartCount: number
+  private isTerminating: boolean
+
+  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
+    super()
+    this.command = command
+    this.args = args
+    this.options = options
+    this.process = null
+    this.restartDelay = options.restartDelay || 5000
+    this.maxRestarts = options.maxRestarts || 5
+    this.restartCount = 0
+    this.isTerminating = false
+  }
+
+  start(): void {
+    this.spawnProcess()
+  }
+
+  private spawnProcess(): void {
+    if (this.isTerminating) return
+
+    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
+    this.process = spawn(this.command, this.args, this.options)
+
+    this.process.stdout?.on('data', (data: Buffer) => {
+      log(`Process output: ${data}`)
+      this.emit('output', data.toString())
+    })
+
+    this.process.stderr?.on('data', (data: Buffer) => {
+      log(`Process error: ${data}`)
+      this.emit('error', data.toString())
+    })
+
+    this.process.on('close', (code: number | null) => {
+      log(`Process exited with code ${code}`)
+      this.emit('close', code)
+      if (!this.isTerminating) {
+        this.restartProcess()
+      }
+    })
+  }
+
+  private restartProcess(): void {
+    if (this.restartCount < this.maxRestarts) {
+      this.restartCount++
+      log(
+        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
+      )
+      setTimeout(() => this.spawnProcess(), this.restartDelay)
+    } else {
+      log('Max restart attempts reached. Exiting watchdog.')
+      this.emit('maxRestartsReached')
+    }
+  }
+
+  terminate(): void {
+    this.isTerminating = true
+    if (this.process) {
+      log('Terminating watched process...')
+      this.process.kill()
+    }
+    this.emit('terminated')
+  }
+}
--- a/extensions/inference-cortex-extension/tsconfig.json
+++ b/extensions/inference-cortex-extension/tsconfig.json
@ -1,9 +1,8 @@
 {
  "compilerOptions": {
    "moduleResolution": "node",
-    "target": "ES2015",
-    "module": "ES2020",
-    "lib": ["es2015", "es2016", "es2017", "dom"],
+    "target": "es2016",
+    "module": "esnext",
    "strict": true,
    "sourceMap": true,
    "declaration": true,
--- a/Show More
+++ b/Show More