Merge pull request #3821 from janhq/feat/path-to-cortexcpp

feat: Jan Integrates Cortex.cpp as Provider
2024-11-06 15:45:48 +07:00 · 2024-11-06 15:45:48 +07:00 · a82c701087
commit a82c701087
parent 0154199161 c92b809883
177 changed files with 2835 additions and 4247 deletions
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@ -319,6 +319,13 @@ jobs:
        #   TURBO_TEAM: 'linux'
        #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
      - uses: actions/upload-artifact@v4
        if: always()
        with:
          name: playwright-report
          path: electron/playwright-report/
          retention-days: 2
  coverage-check:
    runs-on: [self-hosted, Linux, ubuntu-desktop]
    needs: base_branch_cov
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@ -1 +1 @@
-npm run lint --fix
+npx oxlint@latest --fix
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
 import { events } from './events'
 import { fs } from './fs'
 import { ModelManager } from './models'
 export enum ExtensionTypeEnum {
  Assistant = 'assistant',
@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
    return undefined
  }
  /**
   * Registers models - it persists in-memory shared ModelManager instance's data map.
   * @param models
   */
  async registerModels(models: Model[]): Promise<void> {
    for (const model of models) {
      ModelManager.instance().register(model)
    }
    events.emit(ModelEvent.OnModelsUpdate, {})
  }
  /**
   * Register settings for the extension.
   * @param settings
   * @returns
   */
  async registerSettings(settings: SettingComponentProps[]): Promise<void> {
    if (!this.name) {
      console.error('Extension name is not defined')
@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }
  /**
   * Get the setting value for the key.
   * @param key
   * @param defaultValue
   * @returns
   */
  async getSetting<T>(key: string, defaultValue: T) {
    const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
    return
  }
  /**
   * Get the settings for the extension.
   * @returns
   */
  async getSettings(): Promise<SettingComponentProps[]> {
    if (!this.name) return []
@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
    }
  }
  /**
   * Update the settings for the extension.
   * @param componentProps
   * @returns
   */
  async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
    if (!this.name) return
--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { EngineManager } from './EngineManager'
 import { fs } from '../../fs'
 jest.mock('../../events')
 jest.mock('./EngineManager')
@ -26,7 +24,7 @@ describe('AIEngine', () => {
  })
  it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any
    await engine.loadModel(model)
@ -34,7 +32,7 @@ describe('AIEngine', () => {
  })
  it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any
    await engine.loadModel(model)
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@ -1,17 +1,14 @@
 import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
 import { EngineManager } from './EngineManager'
 import { ModelManager } from '../../models/manager'
 /**
 * Base AIEngine
 * Applicable to all AI Engines
 */
 export abstract class AIEngine extends BaseExtension {
  private static modelsFolder = 'models'
  // The inference engine
  abstract provider: string
@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
  override onLoad() {
    this.registerEngine()
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }
@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
    EngineManager.instance().register(this)
  }
  async registerModels(models: Model[]): Promise<void> {
    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
    let shouldNotifyModelUpdate = false
    for (const model of models) {
      const modelPath = await joinPath([modelFolderPath, model.id])
      const isExist = await fs.existsSync(modelPath)
      if (isExist) {
        await this.migrateModelIfNeeded(model, modelPath)
        continue
      }
      await fs.mkdir(modelPath)
      await fs.writeFileSync(
        await joinPath([modelPath, 'model.json']),
        JSON.stringify(model, null, 2)
      )
      shouldNotifyModelUpdate = true
    }
    if (shouldNotifyModelUpdate) {
      events.emit(ModelEvent.OnModelsUpdate, {})
    }
  }
  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
    try {
      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
      const currentModel: Model = JSON.parse(modelJson)
      if (currentModel.version !== model.version) {
        await fs.writeFileSync(
          await joinPath([modelPath, 'model.json']),
          JSON.stringify(model, null, 2)
        )
        events.emit(ModelEvent.OnModelsUpdate, {})
      }
    } catch (error) {
      console.warn('Error while try to migrating model', error)
    }
  }
  /**
   * Loads the model.
   */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
    if (model.engine.toString() !== this.provider) return Promise.resolve()
    events.emit(ModelEvent.OnModelReady, model)
    return Promise.resolve()
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@ -1,3 +1,4 @@
 import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'
 /**
@ -20,6 +21,22 @@ export class EngineManager {
   * @returns The engine, if found.
   */
  get<T extends AIEngine>(provider: string): T | undefined {
    // Backward compatible provider
    // nitro is migrated to cortex
    if (
      [
        InferenceEngine.nitro,
        InferenceEngine.cortex,
        InferenceEngine.cortex_llamacpp,
        InferenceEngine.cortex_onnx,
        InferenceEngine.cortex_tensorrtllm,
        InferenceEngine.cortex_onnx,
      ]
        .map((e) => e.toString())
        .includes(provider)
    )
      provider = InferenceEngine.cortex
    return this.engines.get(provider) as T | undefined
  }
@ -27,6 +44,6 @@ export class EngineManager {
   * The instance of the engine manager.
   */
  static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
  }
 }
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@ -3,7 +3,7 @@
 */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'
 jest.mock('../../core', () => ({
@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
  })
  it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: null }
@ -66,7 +66,7 @@ describe('LocalOAIEngine', () => {
  })
  it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
    const modelFolder = 'path/to'
    const systemInfo = { os: 'testOS' }
    const res = { error: 'load error' }
@ -91,9 +91,7 @@ describe('LocalOAIEngine', () => {
  it('should not unload model if engine does not match', async () => {
    const model: Model = { engine: 'otherProvider' } as any
    await engine.unloadModel(model)
    expect(executeOnMain).not.toHaveBeenCalled()
    expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
  })
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@ -1,6 +1,6 @@
-import { executeOnMain, systemInformation, dirName } from '../../core'
+import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'
 /**
@ -22,16 +22,16 @@ export abstract class LocalOAIEngine extends OAIEngine {
  override onLoad() {
    super.onLoad()
    // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
    events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
  }
  /**
   * Load the model.
   */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model & { file_path?: string }): Promise<void> {
    if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
+    const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
    const systemInfo = await systemInformation()
    const res = await executeOnMain(
      this.nodeModule,
@ -63,4 +63,12 @@ export abstract class LocalOAIEngine extends OAIEngine {
      events.emit(ModelEvent.OnModelStopped, {})
    })
  }
  /// Legacy
  private getModelFilePath = async (
    id: string,
  ): Promise<string> => {
    return joinPath([await getJanDataFolderPath(), 'models', id])
  }
  ///
 }
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
   * Inference request
   */
  override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
      events.emit(MessageEvent.OnMessageResponse, {
        status: MessageStatus.Error,
        content: [
          {
            type: ContentType.Text,
            text: {
              value: 'No model ID provided',
              annotations: [],
            },
          },
        ],
      })
      return
    }
    const timestamp = Date.now()
    const message: ThreadMessage = {
@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
      model: model.id,
      stream: true,
      ...model.parameters,
      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
    }
    if (this.transformPayload) {
      requestBody = this.transformPayload(requestBody)
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -10,7 +10,7 @@ export function requestInference(
  requestBody: any,
  model: {
    id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
  },
  controller?: AbortController,
  headers?: HeadersInit,
@ -22,7 +22,7 @@ export function requestInference(
      headers: {
        'Content-Type': 'application/json',
        'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
        ...headers,
      },
      body: JSON.stringify(requestBody),
@ -45,7 +45,7 @@ export function requestInference(
          subscriber.complete()
          return
        }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
          const data = await response.json()
          if (transformResponse) {
            subscriber.next(transformResponse(data))
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
+import { Model, ModelInterface, OptionType } from '../../types'
  GpuSetting,
  HuggingFaceRepoData,
  ImportingModel,
  Model,
  ModelFile,
  ModelInterface,
  OptionType,
 } from '../../types'
 /**
 * Model extension for managing models.
@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
    return ExtensionTypeEnum.Model
  }
-  abstract downloadModel(
+  abstract getModels(): Promise<Model[]>
-    model: Model,
+  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
-    gpuSettings?: GpuSetting,
+  abstract cancelModelPull(modelId: string): Promise<void>
-    network?: { proxy: string; ignoreSSL?: boolean }
+  abstract importModel(model: string, modePath: string, name?: string, optionType?: OptionType): Promise<void>
-  ): Promise<void>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
-  abstract cancelModelDownload(modelId: string): Promise<void>
+  abstract deleteModel(model: string): Promise<void>
  abstract deleteModel(model: ModelFile): Promise<void>
  abstract getDownloadedModels(): Promise<ModelFile[]>
  abstract getConfiguredModels(): Promise<ModelFile[]>
  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
  abstract getDefaultModel(): Promise<Model>
 }
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@ -1,32 +1,37 @@
-import * as Core from './core';
+import * as Core from './core'
-import * as Events from './events';
+import * as Events from './events'
-import * as FileSystem from './fs';
+import * as FileSystem from './fs'
-import * as Extension from './extension';
+import * as Extension from './extension'
-import * as Extensions from './extensions';
+import * as Extensions from './extensions'
-import * as Tools from './tools';
+import * as Tools from './tools'
 import * as Models from './models'
 describe('Module Tests', () => {
-    it('should export Core module', () => {
+  it('should export Core module', () => {
-        expect(Core).toBeDefined();
+    expect(Core).toBeDefined()
-    });
+  })
-    it('should export Event module', () => {
+  it('should export Event module', () => {
-        expect(Events).toBeDefined();
+    expect(Events).toBeDefined()
-    });
+  })
-    it('should export Filesystem module', () => {
+  it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
+    expect(FileSystem).toBeDefined()
-    });
+  })
-    it('should export Extension module', () => {
+  it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
+    expect(Extension).toBeDefined()
-    });
+  })
-    it('should export all base extensions', () => {
+  it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
+    expect(Extensions).toBeDefined()
-    });
+  })
-    it('should export all base tools', () => {
+  it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
+    expect(Tools).toBeDefined()
-    });
+  })
-});
+
  it('should export all base tools', () => {
    expect(Models).toBeDefined()
  })
 })
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@ -33,3 +33,9 @@ export * from './extensions'
 * @module
 */
 export * from './tools'
 /**
 * Export all base models.
 * @module
 */
 export * from './models'
--- a/core/src/browser/models/index.ts
+++ b/core/src/browser/models/index.ts
@ -0,0 +1,10 @@
 /**
 * Export ModelManager
 * @module
 */
 export { ModelManager } from './manager'
 /**
 * Export all utils
 */
 export * from './utils'
--- a/core/src/browser/models/manager.ts
+++ b/core/src/browser/models/manager.ts
@ -0,0 +1,47 @@
 import { Model, ModelEvent } from '../../types'
 import { events } from '../events'
 /**
 * Manages the registered models across extensions.
 */
 export class ModelManager {
  public models = new Map<string, Model>()
  constructor() {
    if (window) {
      window.core.modelManager = this
    }
  }
  /**
   * Registers a model.
   * @param model - The model to register.
   */
  register<T extends Model>(model: T) {
    if (this.models.has(model.id)) {
      this.models.set(model.id, {
        ...model,
        ...this.models.get(model.id),
      })
    } else {
      this.models.set(model.id, model)
    }
    events.emit(ModelEvent.OnModelsUpdate, {})
  }
  /**
   * Retrieves a model by it's id.
   * @param id - The id of the model to retrieve.
   * @returns The model, if found.
   */
  get<T extends Model>(id: string): T | undefined {
    return this.models.get(id) as T | undefined
  }
  /**
   * The instance of the tool manager.
   */
  static instance(): ModelManager {
    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
  }
 }
--- a/core/src/browser/models/utils.test.ts
+++ b/core/src/browser/models/utils.test.ts
@ -1,7 +1,10 @@
 // web/utils/modelParam.test.ts
-import { normalizeValue, validationRules } from './modelParam'
+import {
-import { extractModelLoadParams } from './modelParam';
+  normalizeValue,
-import { extractInferenceParams } from './modelParam';
+  validationRules,
  extractModelLoadParams,
  extractInferenceParams,
 } from './utils'
 describe('validationRules', () => {
  it('should validate temperature correctly', () => {
@ -151,13 +154,12 @@ describe('validationRules', () => {
  })
 })
-
+it('should normalize invalid values for keys not listed in validationRules', () => {
-  it('should normalize invalid values for keys not listed in validationRules', () => {
+  expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
-    expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
+  expect(normalizeValue('invalid_key', 123)).toBe(123)
-    expect(normalizeValue('invalid_key', 123)).toBe(123)
+  expect(normalizeValue('invalid_key', true)).toBe(true)
-    expect(normalizeValue('invalid_key', true)).toBe(true)
+  expect(normalizeValue('invalid_key', false)).toBe(false)
-    expect(normalizeValue('invalid_key', false)).toBe(false)
+})
  })
 describe('normalizeValue', () => {
  it('should normalize ctx_len correctly', () => {
@ -192,19 +194,16 @@ describe('normalizeValue', () => {
  })
 })
 it('should handle invalid values correctly by falling back to originParams', () => {
  const modelParams = { temperature: 'invalid', token_limit: -1 }
  const originParams = { temperature: 0.5, token_limit: 100 }
  expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
 })
-  it('should handle invalid values correctly by falling back to originParams', () => {
+it('should return an empty object when no modelParams are provided', () => {
-    const modelParams = { temperature: 'invalid', token_limit: -1 };
+  expect(extractModelLoadParams()).toEqual({})
-    const originParams = { temperature: 0.5, token_limit: 100 };
+})
    expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
  });
-
+it('should return an empty object when no modelParams are provided', () => {
-  it('should return an empty object when no modelParams are provided', () => {
+  expect(extractInferenceParams()).toEqual({})
-    expect(extractModelLoadParams()).toEqual({});
+})
  });
  it('should return an empty object when no modelParams are provided', () => {
    expect(extractInferenceParams()).toEqual({});
  });
--- a/core/src/browser/models/utils.ts
+++ b/core/src/browser/models/utils.ts
@ -1,26 +1,20 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/naming-convention */
-import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
+import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'
 import { ModelParams } from '@/types/model'
 /**
 * Validation rules for model parameters
 */
 export const validationRules: { [key: string]: (value: any) => boolean } = {
-  temperature: (value: any) =>
+  temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
    typeof value === 'number' && value >= 0 && value <= 2,
  token_limit: (value: any) => Number.isInteger(value) && value >= 0,
  top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
  top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
  stream: (value: any) => typeof value === 'boolean',
  max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
-  stop: (value: any) =>
+  stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
-    Array.isArray(value) && value.every((v) => typeof v === 'string'),
+  frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
-  frequency_penalty: (value: any) =>
+  presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
    typeof value === 'number' && value >= 0 && value <= 1,
  presence_penalty: (value: any) =>
    typeof value === 'number' && value >= 0 && value <= 1,
  ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
  ngl: (value: any) => Number.isInteger(value) && value >= 0,
@ -76,6 +70,7 @@ export const extractInferenceParams = (
    stop: undefined,
    frequency_penalty: undefined,
    presence_penalty: undefined,
    engine: undefined,
  }
  const runtimeParams: ModelRuntimeParams = {}
@ -119,11 +114,18 @@ export const extractModelLoadParams = (
    embedding: undefined,
    n_parallel: undefined,
    cpu_threads: undefined,
    pre_prompt: undefined,
    system_prompt: undefined,
    ai_prompt: undefined,
    user_prompt: undefined,
    prompt_template: undefined,
    model_path: undefined,
    llama_model_path: undefined,
    mmproj: undefined,
    cont_batching: undefined,
    vision_model: undefined,
    text_model: undefined,
    engine: undefined,
  }
  const settingParams: ModelSettingParams = {}
--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({
 jest.mock('../../helper/path', () => ({
  validatePath: jest.fn().mockReturnValue('path/to/folder'),
-  normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
+  normalizeFilePath: () =>
    process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
 }))
 jest.mock(
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@ -50,11 +50,6 @@ export class Downloader implements Processor {
    const initialDownloadState: DownloadState = {
      modelId,
      fileName,
      time: {
        elapsed: 0,
        remaining: 0,
      },
      speed: 0,
      percent: 0,
      size: {
        total: 0,
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
  // add engine for new cortex cpp engine
  if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'cortex.llamacpp'
+    request.body.engine = 'llama-cpp'
  }
  const fetch = require('node-fetch')
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ b/core/src/node/api/restful/helper/consts.test.ts
@ -1,6 +1,5 @@
 import { CORTEX_DEFAULT_PORT } from './consts'
-import { NITRO_DEFAULT_PORT } from './consts';
+it('should test CORTEX_DEFAULT_PORT', () => {
-
+  expect(CORTEX_DEFAULT_PORT).toBe(39291)
-it('should test NITRO_DEFAULT_PORT', () => {
+})
  expect(NITRO_DEFAULT_PORT).toBe(3928);
 });
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@ -1,19 +1,9 @@
 // The PORT to use for the Nitro subprocess
-export const NITRO_DEFAULT_PORT = 3928
+export const CORTEX_DEFAULT_PORT = 39291
 // The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
-// The URL for the Nitro subprocess
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
 // The URL for the Nitro subprocess to load a model
 export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
 // The URL for the Nitro subprocess to validate a model
 export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
 // The URL for the Nitro subprocess to kill itself
 export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
 export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ b/core/src/node/api/restful/helper/startStopModel.test.ts
@ -1,16 +1,10 @@
 import { startModel } from './startStopModel'
 describe('startModel', () => {
  it('test_startModel_error', async () => {
    const modelId = 'testModelId'
    const settingParams = undefined
-  import { startModel } from './startStopModel'
+    expect(startModel(modelId, settingParams)).resolves.toThrow()
  describe('startModel', () => {
    it('test_startModel_error', async () => {
      const modelId = 'testModelId'
      const settingParams = undefined
      const result = await startModel(modelId, settingParams)
      expect(result).toEqual({
        error: expect.any(Error),
      })
    })
  })
 })
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@ -1,6 +1,5 @@
 import { join } from 'path'
 import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
 import { ModelSettingParams } from '../../../../types'
 import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
 /**
 * Start a model
@ -9,70 +8,18 @@ import { ModelSettingParams } from '../../../../types'
 * @returns
 */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  try {
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
-    await runModel(modelId, settingParams)
+    method: 'POST',
-
+    body: JSON.stringify({ model: modelId, ...settingParams }),
-    return {
+  })
      message: `Model ${modelId} started`,
    }
  } catch (e) {
    return {
      error: e,
    }
  }
 }
 /**
 * Run a model using installed cortex extension
 * @param model
 * @param settingParams
 */
 const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
  const janDataFolderPath = getJanDataFolderPath()
  const modelFolder = join(janDataFolderPath, 'models', model)
  let module = join(
    getJanExtensionsPath(),
    '@janhq',
    'inference-cortex-extension',
    'dist',
    'node',
    'index.cjs'
  )
  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
  return import(module).then((extension) =>
    extension
      .loadModel(
        {
          modelFolder,
          model,
        },
        settingParams
      )
      .then(() => log(`[SERVER]::Debug: Model is loaded`))
      .then({
        message: 'Model started',
      })
  )
 }
 /*
- * Stop model and kill nitro process.
+ * Stop model.
 */
-export const stopModel = async (_modelId: string) => {
+export const stopModel = async (modelId: string) => {
-  let module = join(
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
-    getJanExtensionsPath(),
+    method: 'POST',
-    '@janhq',
+    body: JSON.stringify({ model: modelId }),
-    'inference-cortex-extension',
+  })
    'dist',
    'node',
    'index.cjs'
  )
  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
  return import(module).then((extension) =>
    extension
      .unloadModel()
      .then(() => log(`[SERVER]::Debug: Model is unloaded`))
      .then({
        message: 'Model stopped',
      })
  )
 }
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@ -72,6 +72,8 @@ export enum DownloadEvent {
  onFileDownloadUpdate = 'onFileDownloadUpdate',
  onFileDownloadError = 'onFileDownloadError',
  onFileDownloadSuccess = 'onFileDownloadSuccess',
  onFileDownloadStopped = 'onFileDownloadStopped',
  onFileDownloadStarted = 'onFileDownloadStarted',
  onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@ -6,8 +6,8 @@ export type FileStat = {
 export type DownloadState = {
  modelId: string // TODO: change to download id
  fileName: string
-  time: DownloadTime
+  time?: DownloadTime
-  speed: number
+  speed?: number
  percent: number
  size: DownloadSize
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
 */
 export type ModelInfo = {
  id: string
-  settings: ModelSettingParams
+  settings?: ModelSettingParams
-  parameters: ModelRuntimeParams
+  parameters?: ModelRuntimeParams
  engine?: InferenceEngine
 }
@ -15,7 +15,6 @@ export type ModelInfo = {
 * Represents the inference engine.
 * @stored
 */
 export enum InferenceEngine {
  anthropic = 'anthropic',
  mistral = 'mistral',
@ -28,11 +27,13 @@ export enum InferenceEngine {
  nitro_tensorrt_llm = 'nitro-tensorrt-llm',
  cohere = 'cohere',
  nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
+  cortex = 'cortex',
-  cortex_onnx = 'cortex.onnx',
+  cortex_llamacpp = 'llama-cpp',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex_onnx = 'onnxruntime',
  cortex_tensorrtllm = 'tensorrt-llm',
 }
 // Represents an artifact of a model, including its filename and URL
 export type ModelArtifact = {
  filename: string
  url: string
@ -104,6 +105,7 @@ export type Model = {
  engine: InferenceEngine
 }
 // Represents metadata associated with a model
 export type ModelMetadata = {
  author: string
  tags: string[]
@ -124,14 +126,20 @@ export type ModelSettingParams = {
  n_parallel?: number
  cpu_threads?: number
  prompt_template?: string
  pre_prompt?: string
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
  // path param
  model_path?: string
  // legacy path param
  llama_model_path?: string
  // clip model path
  mmproj?: string
  cont_batching?: boolean
  vision_model?: boolean
  text_model?: boolean
  engine?: boolean
 }
 /**
@ -150,11 +158,12 @@ export type ModelRuntimeParams = {
  engine?: string
 }
 // Represents a model that failed to initialize, including the error
 export type ModelInitFailed = Model & {
  error: Error
 }
 /**
- * ModelFile is the model.json entity and it's file metadata
+ * ModelParams types
 */
-export type ModelFile = Model & FileMetadata
+export type ModelParams = ModelRuntimeParams | ModelSettingParams
--- a/core/src/types/model/modelImport.ts
+++ b/core/src/types/model/modelImport.ts
@ -1,4 +1,4 @@
-export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
+export type OptionType = 'symlink' | 'copy'
 export type ModelImportOption = {
  type: OptionType
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
+import { Model } from './modelEntity'
-import { Model, ModelFile } from './modelEntity'
+import { OptionType } from './modelImport'
 /**
 * Model extension for managing models.
@ -8,38 +8,46 @@ export interface ModelInterface {
  /**
   * Downloads a model.
   * @param model - The model to download.
   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
   * @returns A Promise that resolves when the model has been downloaded.
   */
-  downloadModel(
+  pullModel(model: string, id?: string, name?: string): Promise<void>
    model: ModelFile,
    gpuSettings?: GpuSetting,
    network?: { ignoreSSL?: boolean; proxy?: string }
  ): Promise<void>
  /**
   * Cancels the download of a specific model.
   * @param {string} modelId - The ID of the model to cancel the download for.
   * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
   */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(model: string): Promise<void>
  /**
   * Deletes a model.
   * @param modelId - The ID of the model to delete.
   * @returns A Promise that resolves when the model has been deleted.
   */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>
  /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
   * @returns A Promise that resolves with an array of downloaded models.
   */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>
  /**
-   * Gets a list of configured models.
+   * Update a pulled model's metadata
-   * @returns A Promise that resolves with an array of configured models.
+   * @param model - The model to update.
   * @returns A Promise that resolves when the model has been updated.
   */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
  /**
   * Import an existing model file.
   * @param model id of the model to import
   * @param modelPath - path of the model file
   */
  importModel(
    model: string,
    modePath: string,
    name?: string,
    optionType?: OptionType
  ): Promise<void>
 }
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@ -1,16 +1,13 @@
 import * as monitoringInterface from './monitoringInterface'
 import * as resourceInfo from './resourceInfo'
-import * as monitoringInterface from './monitoringInterface';
+import * as index from './index'
 import * as resourceInfo from './resourceInfo';
-    import * as index from './index';
+it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
-    import * as monitoringInterface from './monitoringInterface';
+  for (const key in monitoringInterface) {
-    import * as resourceInfo from './resourceInfo';
+    expect(index[key]).toBe(monitoringInterface[key])
-    
+  }
-    it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
+  for (const key in resourceInfo) {
-      for (const key in monitoringInterface) {
+    expect(index[key]).toBe(resourceInfo[key])
-        expect(index[key]).toBe(monitoringInterface[key]);
+  }
-      }
+})
      for (const key in resourceInfo) {
        expect(index[key]).toBe(resourceInfo[key]);
      }
    });
--- a/electron/package.json
+++ b/electron/package.json
@ -18,7 +18,8 @@
      "docs/**/*",
      "scripts/**/*",
      "icons/**/*",
-      "themes"
+      "themes",
      "shared"
    ],
    "asarUnpack": [
      "pre-install",
@ -26,7 +27,8 @@
      "docs",
      "scripts",
      "icons",
-      "themes"
+      "themes",
      "shared"
    ],
    "publish": [
      {
--- a/electron/shared/.gitkeep
+++ b/electron/shared/.gitkeep
--- a/electron/tests/config/fixtures.ts
+++ b/electron/tests/config/fixtures.ts
@ -15,6 +15,8 @@ import {
 import { Constants } from './constants'
 import { HubPage } from '../pages/hubPage'
 import { CommonActions } from '../pages/commonActions'
 import { rmSync } from 'fs'
 import * as path from 'path'
 export let electronApp: ElectronApplication
 export let page: Page
@ -103,10 +105,14 @@ export const test = base.extend<
    },
    { auto: true },
  ],
 })
 test.beforeAll(async () => {
  await rmSync(path.join(__dirname, '../../test-data'), {
    recursive: true,
    force: true,
  })
  test.setTimeout(TIMEOUT)
  await setupElectron()
  await page.waitForSelector('img[alt="Jan - Logo"]', {
--- a/electron/tests/e2e/hub.e2e.spec.ts
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@ -16,7 +16,8 @@ test.beforeAll(async () => {
 test('explores hub', async ({ hubPage }) => {
  await hubPage.navigateByMenu()
  await hubPage.verifyContainerVisible()
-  const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first()
+  await hubPage.scrollToBottom()
  const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()
  await expect(useModelBtn).toBeVisible({
    timeout: TIMEOUT,
--- a/electron/tests/pages/basePage.ts
+++ b/electron/tests/pages/basePage.ts
@ -8,9 +8,8 @@ export class BasePage {
  constructor(
    protected readonly page: Page,
    readonly action: CommonActions,
-    protected containerId: string,
+    protected containerId: string
-  ) {
+  ) {}
  }
  public getValue(key: string) {
    return this.action.getValue(key)
@ -37,6 +36,12 @@ export class BasePage {
    expect(container.isVisible()).toBeTruthy()
  }
  async scrollToBottom() {
    await this.page.evaluate(() => {
      window.scrollTo(0, document.body.scrollHeight)
    })
  }
  async waitUpdateLoader() {
    await this.isElementVisible('img[alt="Jan - Logo"]')
  }
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@ -63,39 +63,46 @@ export default class JanAssistantExtension extends AssistantExtension {
  }
  async getAssistants(): Promise<Assistant[]> {
-    // get all the assistant directories
+    try {
-    // get all the assistant metadata json
+      // get all the assistant directories
-    const results: Assistant[] = []
+      // get all the assistant metadata json
-    const allFileName: string[] = await fs.readdirSync(
+      const results: Assistant[] = []
      JanAssistantExtension._homeDir
    )
    for (const fileName of allFileName) {
      const filePath = await joinPath([
        JanAssistantExtension._homeDir,
        fileName,
      ])
-      if (!(await fs.fileStat(filePath))?.isDirectory) continue
+      const allFileName: string[] = await fs.readdirSync(
-      const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
+        JanAssistantExtension._homeDir
        (file: string) => file === 'assistant.json'
      )
-      if (jsonFiles.length !== 1) {
+      for (const fileName of allFileName) {
-        // has more than one assistant file -> ignore
+        const filePath = await joinPath([
-        continue
+          JanAssistantExtension._homeDir,
          fileName,
        ])
        if (!(await fs.fileStat(filePath))?.isDirectory) continue
        const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
          (file: string) => file === 'assistant.json'
        )
        if (jsonFiles.length !== 1) {
          // has more than one assistant file -> ignore
          continue
        }
        const content = await fs.readFileSync(
          await joinPath([filePath, jsonFiles[0]]),
          'utf-8'
        )
        const assistant: Assistant =
          typeof content === 'object' ? content : JSON.parse(content)
        results.push(assistant)
      }
-      const content = await fs.readFileSync(
+      return results
-        await joinPath([filePath, jsonFiles[0]]),
+    } catch (err) {
-        'utf-8'
+      console.debug(err)
-      )
+      return [this.defaultAssistant]
      const assistant: Assistant =
        typeof content === 'object' ? content : JSON.parse(content)
      results.push(assistant)
    }
    return results
  }
  async deleteAssistant(assistant: Assistant): Promise<void> {
@ -112,39 +119,39 @@ export default class JanAssistantExtension extends AssistantExtension {
  }
  private async createJanAssistant(): Promise<void> {
-    const janAssistant: Assistant = {
+    await this.createAssistant(this.defaultAssistant)
-      avatar: '',
+  }
-      thread_location: undefined,
+
-      id: 'jan',
+  private defaultAssistant: Assistant = {
-      object: 'assistant',
+    avatar: '',
-      created_at: Date.now(),
+    thread_location: undefined,
-      name: 'Jan',
+    id: 'jan',
-      description: 'A default assistant that can use all downloaded models',
+    object: 'assistant',
-      model: '*',
+    created_at: Date.now(),
-      instructions: '',
+    name: 'Jan',
-      tools: [
+    description: 'A default assistant that can use all downloaded models',
-        {
+    model: '*',
-          type: 'retrieval',
+    instructions: '',
-          enabled: false,
+    tools: [
-          useTimeWeightedRetriever: false,
+      {
-          settings: {
+        type: 'retrieval',
-            top_k: 2,
+        enabled: false,
-            chunk_size: 1024,
+        useTimeWeightedRetriever: false,
-            chunk_overlap: 64,
+        settings: {
-            retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+          top_k: 2,
          chunk_size: 1024,
          chunk_overlap: 64,
          retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ----------------
 CONTEXT: {CONTEXT}
 ----------------
 QUESTION: {QUESTION}
 ----------------
 Helpful Answer:`,
          },
        },
-      ],
+      },
-      file_ids: [],
+    ],
-      metadata: undefined,
+    file_ids: [],
-    }
+    metadata: undefined,
    await this.createAssistant(janAssistant)
  }
 }
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'
 import path from 'path'
 export class Retrieval {
  public chunkSize: number = 100
  public chunkOverlap?: number = 0
--- a/extensions/inference-cortex-extension/.gitignore
+++ b/extensions/inference-cortex-extension/.gitignore
--- a/extensions/inference-cortex-extension/README.md
+++ b/extensions/inference-cortex-extension/README.md
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -0,0 +1 @@
 1.0.2-rc4
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@ -0,0 +1,41 @@
@echo off
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
@REM Download cortex.llamacpp binaries
 set VERSION=v0.1.35
 set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
 del %BIN_PATH%\cortex-beta.exe
 del %BIN_PATH%\cortex.exe
@REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
    echo Processing folder: %BIN_PATH%\%%F
    @REM Move all .dll files except engine.dll
    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
        if /I not "%%~nxD"=="engine.dll" (
            move "%%D" "%BIN_PATH%"
        )
    )
 )
 echo DLL files moved successfully.
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@ -0,0 +1,47 @@
 #!/bin/bash
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
 ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
 CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
 # Detect platform
 OS_TYPE=$(uname)
 if [ "$OS_TYPE" == "Linux" ]; then
    # Linux downloads
    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
    mv ./bin/cortex-server-beta ./bin/cortex-server
    rm -rf ./bin/cortex
    rm -rf ./bin/cortex-beta
    chmod +x "./bin/cortex-server"
    # Download engines for Linux
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
 elif [ "$OS_TYPE" == "Darwin" ]; then
    # macOS downloads
    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
    mv ./bin/cortex-server-beta ./bin/cortex-server
    rm -rf ./bin/cortex
    rm -rf ./bin/cortex-beta
    chmod +x "./bin/cortex-server"
    # Download engines for macOS
    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
 else
    echo "Unsupported operating system: $OS_TYPE"
    exit 1
 fi
--- a/extensions/inference-cortex-extension/jest.config.js
+++ b/extensions/inference-cortex-extension/jest.config.js
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -10,12 +10,12 @@
  "scripts": {
    "test": "jest",
    "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
+    "downloadcortex:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
+    "downloadcortex:win32": "download.bat",
-    "downloadnitro": "run-script-os",
+    "downloadcortex": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "build:publish": "run-script-os"
  },
  "exports": {
    ".": "./dist/index.js",
@ -50,6 +50,8 @@
    "cpu-instructions": "^0.0.13",
    "decompress": "^4.2.1",
    "fetch-retry": "^5.0.6",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1",
    "rxjs": "^7.8.1",
    "tcp-port-used": "^1.0.2",
    "terminate": "2.6.1",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
--- a/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@ -31,5 +31,5 @@
      "tags": ["34B", "Finetuned"],
      "size": 21556982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@ -31,5 +31,5 @@
      "tags": ["7B", "Finetuned"],
      "size": 5056982144
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@ -31,5 +31,5 @@
    "tags": ["Vision"],
    "size": 5750000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@ -31,6 +31,6 @@
      "tags": ["22B", "Finetuned", "Featured"],
      "size": 13341237440
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@ -31,6 +31,6 @@
      "tags": ["34B", "Finetuned"],
      "size": 21500000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny"],
    "size": 1430000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["33B"],
    "size": 19940000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@ -31,5 +31,5 @@
    "tags": ["2B", "Finetuned", "Tiny"],
    "size": 1630000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 5330000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 16600000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@ -38,5 +38,5 @@
    ],
    "size": 1710000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@ -37,5 +37,5 @@
    ],
    "size": 5760000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@ -31,5 +31,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 43920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Foundational Model"],
    "size": 4080000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@ -31,5 +31,5 @@
      "tags": ["8B"],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 4920000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 42500000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -37,5 +37,5 @@
    ],
    "size": 4920000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["1B", "Featured"],
    "size": 1320000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@ -31,5 +31,5 @@
    "tags": ["3B", "Featured"],
    "size": 3420000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 1170000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@ -32,5 +32,5 @@
    "tags": ["Vision"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@ -32,5 +32,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@ -30,5 +30,5 @@
    "tags": ["70B", "Foundational Model"],
    "size": 26440000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["Recommended", "7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@ -34,5 +34,5 @@
      ],
      "size": 2320000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@ -34,5 +34,5 @@
      ],
      "size": 8366000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["34B", "Finetuned"],
    "size": 20220000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
@ -31,5 +31,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4770000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Finetuned"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["14B", "Featured"],
      "size": 8990000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["32B"],
      "size": 19900000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["72B"],
      "size": 47400000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Featured"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Featured"],
      "size": 4680000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
@ -31,5 +31,5 @@
      "tags": ["3B", "Finetuned", "Tiny"],
      "size": 2970000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
@ -30,5 +30,5 @@
    "tags": ["7B", "Finetuned"],
    "size": 4370000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
@ -31,5 +31,5 @@
    "tags": ["Tiny", "Foundation Model"],
    "size": 669000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
@ -31,5 +31,5 @@
    "size": 4370000000,
    "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
@ -31,6 +31,6 @@
      "tags": ["7B", "Finetuned"],
      "size": 4410000000
    },
-    "engine": "nitro"
+    "engine": "llama-cpp"
  }
--- a/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
@ -31,5 +31,5 @@
    "tags": ["Recommended", "13B", "Finetuned"],
    "size": 7870000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
@ -31,5 +31,5 @@
    "tags": ["34B", "Foundational Model"],
    "size": 20660000000
  },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@ -114,19 +114,7 @@ export default [
        ]),
        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
        DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        INFERENCE_URL: JSON.stringify(
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
          process.env.INFERENCE_URL ||
            'http://127.0.0.1:3928/inferences/server/chat_completion'
        ),
        TROUBLESHOOTING_URL: JSON.stringify(
          'https://jan.ai/guides/troubleshooting'
        ),
        JAN_SERVER_INFERENCE_URL: JSON.stringify(
          'http://localhost:1337/v1/chat/completions'
        ),
        CUDA_DOWNLOAD_URL: JSON.stringify(
          'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
        ),
      }),
      // Allow json resolution
      json(),
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@ -1,7 +1,5 @@
 declare const NODE: string
-declare const INFERENCE_URL: string
+declare const CORTEX_API_URL: string
 declare const TROUBLESHOOTING_URL: string
 declare const JAN_SERVER_INFERENCE_URL: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
--- a/extensions/inference-cortex-extension/src/babel.config.js
+++ b/extensions/inference-cortex-extension/src/babel.config.js
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -0,0 +1,168 @@
 /**
 * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 * @version 1.0.0
 * @module inference-extension/src/index
 */
 import {
  Model,
  executeOnMain,
  systemInformation,
  joinPath,
  dirName,
  LocalOAIEngine,
  InferenceEngine,
  getJanDataFolderPath,
  extractModelLoadParams,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
 /**
 * A class that implements the InferenceExtension interface from the @janhq/core package.
 * The class provides methods for initializing and stopping a model, and for making inference requests.
 * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
 */
 export default class JanInferenceCortexExtension extends LocalOAIEngine {
  // DEPRECATED
  nodeModule: string = 'node'
  queue = new PQueue({ concurrency: 1 })
  provider: string = InferenceEngine.cortex
  /**
   * The URL for making inference requests.
   */
  inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
  /**
   * Subscribes to events emitted by the @janhq/core package.
   */
  async onLoad() {
    const models = MODELS as Model[]
    this.registerModels(models)
    super.onLoad()
    // Run the process watchdog
    const systemInfo = await systemInformation()
    await this.clean()
    await executeOnMain(NODE, 'run', systemInfo)
    this.queue.add(() => this.healthz())
    window.addEventListener('beforeunload', () => {
      this.clean()
    })
  }
  onUnload(): void {
    this.clean()
    executeOnMain(NODE, 'dispose')
    super.onUnload()
  }
  override async loadModel(
    model: Model & { file_path?: string }
  ): Promise<void> {
    if (
      model.engine === InferenceEngine.nitro &&
      model.settings.llama_model_path
    ) {
      // Legacy chat model support
      model.settings = {
        ...model.settings,
        llama_model_path: await getModelFilePath(
          model,
          model.settings.llama_model_path
        ),
      }
    } else {
      const { llama_model_path, ...settings } = model.settings
      model.settings = settings
    }
    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
      // Legacy clip vision model support
      model.settings = {
        ...model.settings,
        mmproj: await getModelFilePath(model, model.settings.mmproj),
      }
    } else {
      const { mmproj, ...settings } = model.settings
      model.settings = settings
    }
    return await ky
      .post(`${CORTEX_API_URL}/v1/models/start`, {
        json: {
          ...extractModelLoadParams(model.settings),
          model: model.id,
          engine:
            model.engine === InferenceEngine.nitro // Legacy model cache
              ? InferenceEngine.cortex_llamacpp
              : model.engine,
        },
      })
      .json()
      .catch(async (e) => {
        throw (await e.response?.json()) ?? e
      })
      .then()
  }
  override async unloadModel(model: Model): Promise<void> {
    return ky
      .post(`${CORTEX_API_URL}/v1/models/stop`, {
        json: { model: model.id },
      })
      .json()
      .then()
  }
  /**
   * Do health check on cortex.cpp
   * @returns
   */
  healthz(): Promise<void> {
    return ky
      .get(`${CORTEX_API_URL}/healthz`, {
        retry: {
          limit: 10,
          methods: ['get'],
        },
      })
      .then(() => {})
  }
  /**
   * Clean cortex processes
   * @returns
   */
  clean(): Promise<any> {
    return ky
      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
        timeout: 2000, // maximum 2 seconds
      })
      .catch(() => {
        // Do nothing
      })
  }
 }
 /// Legacy
 export const getModelFilePath = async (
  model: Model,
  file: string
 ): Promise<string> => {
  // Symlink to the model file
  if (!model.sources[0]?.url.startsWith('http')) {
    return model.sources[0]?.url
  }
  return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
 }
 ///
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableNitroFile } from './execute'
+import { executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
@ -27,10 +27,10 @@ jest.mock('cpu-instructions', () => ({
    cpuInfo: jest.fn(),
  },
 }))
-let mock = cpuInfo.cpuInfo as jest.Mock
+let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
-mock.mockReturnValue([])
+mockCpuInfo.mockReturnValue([])
-describe('test executable nitro file', () => {
+describe('test executable cortex file', () => {
  afterAll(function () {
    Object.defineProperty(process, 'platform', {
      value: originalPlatform,
@ -44,10 +44,14 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'arm64',
    })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-arm64`),
+        enginePath: expect.stringContaining(`arm64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
+        binPath: expect.stringContaining(`bin`),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
            : expect.anything(),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -55,10 +59,14 @@ describe('test executable nitro file', () => {
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-x64`),
+        enginePath: expect.stringContaining(`x64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+        binPath: expect.stringContaining(`bin`),
        executablePath:
          originalPlatform === 'darwin'
            ? expect.stringContaining(`cortex-server`)
            : expect.anything(),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -73,10 +81,12 @@ describe('test executable nitro file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['avx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`win`),
+        enginePath: expect.stringContaining(`avx`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -107,10 +117,12 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['avx2'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-11-7`),
+        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -141,10 +153,12 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['noavx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-12-0`),
+        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server.exe`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -159,10 +173,11 @@ describe('test executable nitro file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    expect(executableNitroFile(settings)).toEqual(
+    mockCpuInfo.mockReturnValue(['noavx'])
    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`linux`),
+        enginePath: expect.stringContaining(`noavx`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '',
        vkVisibleDevices: '',
      })
@ -193,10 +208,11 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-11-7`),
+        enginePath: expect.stringContaining(`cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -227,10 +243,11 @@ describe('test executable nitro file', () => {
        },
      ],
    }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
      expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-12-0`),
+        enginePath: expect.stringContaining(`cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        binPath: expect.stringContaining(`bin`),
        executablePath: expect.stringContaining(`cortex-server`),
        cudaVisibleDevices: '0',
        vkVisibleDevices: '0',
      })
@ -249,12 +266,13 @@ describe('test executable nitro file', () => {
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-${instruction}`),
+          enginePath: expect.stringContaining(instruction),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
@ -273,11 +291,12 @@ describe('test executable nitro file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`win-${instruction}`),
+          enginePath: expect.stringContaining(instruction),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
@ -312,11 +331,12 @@ describe('test executable nitro file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`win-cuda-12-0`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server.exe`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -351,11 +371,12 @@ describe('test executable nitro file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-cuda-12-0`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -391,11 +412,12 @@ describe('test executable nitro file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-vulkan`),
+          enginePath: expect.stringContaining(`vulkan`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          binPath: expect.stringContaining(`bin`),
          executablePath: expect.stringContaining(`cortex-server`),
          cudaVisibleDevices: '0',
          vkVisibleDevices: '0',
        })
@ -417,11 +439,15 @@ describe('test executable nitro file', () => {
        ...testSettings,
        run_mode: 'cpu',
      }
-      mock.mockReturnValue([])
+      mockCpuInfo.mockReturnValue([])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
        expect.objectContaining({
-          enginePath: expect.stringContaining(`mac-x64`),
+          enginePath: expect.stringContaining(`x64`),
-          executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+          binPath: expect.stringContaining(`bin`),
          executablePath:
            originalPlatform === 'darwin'
              ? expect.stringContaining(`cortex-server`)
              : expect.anything(),
          cudaVisibleDevices: '',
          vkVisibleDevices: '',
        })
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@ -2,8 +2,9 @@ import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
-export interface NitroExecutableOptions {
+export interface CortexExecutableOptions {
  enginePath: string
  binPath: string
  executablePath: string
  cudaVisibleDevices: string
  vkVisibleDevices: string
@ -36,8 +37,8 @@ const os = (): string => {
    ? 'win'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
-        ? 'mac-arm64'
+        ? 'arm64'
-        : 'mac-x64'
+        : 'x64'
      : 'linux'
 }
@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
-const cpuInstructions = () => {
+const cpuInstructions = (): string => {
  if (process.platform === 'darwin') return ''
  return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
    ? 'avx512'
@ -81,29 +82,32 @@ const cpuInstructions = () => {
 * Find which executable file to run based on the current platform.
 * @returns The name of the executable file to run.
 */
-export const executableNitroFile = (
+export const executableCortexFile = (
  gpuSetting?: GpuSetting
-): NitroExecutableOptions => {
+): CortexExecutableOptions => {
-  let engineFolder = [
+  const cpuInstruction = cpuInstructions()
-    os(),
+  let engineFolder = gpuSetting?.vulkan
-    ...(gpuSetting?.vulkan
+    ? 'vulkan'
-      ? []
+    : process.platform === 'darwin'
      ? os()
      : [
-          gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
+        gpuRunMode(gpuSetting) !== 'cuda' ||
-          gpuRunMode(gpuSetting),
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          cudaVersion(gpuSetting),
+          ? cpuInstruction
-        ]),
+          : 'noavx',
-    gpuSetting?.vulkan ? 'vulkan' : undefined,
+        gpuRunMode(gpuSetting),
-  ]
+        cudaVersion(gpuSetting),
-    .filter((e) => !!e)
+      ]
-    .join('-')
+        .filter((e) => !!e)
        .join('-')
  let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
  let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
+  let binaryName = `cortex-server${extension()}`
-
+  const binPath = path.join(__dirname, '..', 'bin')
  return {
-    enginePath: path.join(__dirname, '..', 'bin', engineFolder),
+    enginePath: path.join(binPath, engineFolder),
-    executablePath: path.join(__dirname, '..', 'bin', binaryName),
+    executablePath: path.join(binPath, binaryName),
    binPath: binPath,
    cudaVisibleDevices,
    vkVisibleDevices,
  }
--- a/extensions/inference-cortex-extension/src/node/index.test.ts
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@ -0,0 +1,94 @@
 jest.mock('@janhq/core/node', () => ({
  ...jest.requireActual('@janhq/core/node'),
  getJanDataFolderPath: () => '',
  getSystemResourceInfo: () => {
    return {
      cpu: {
        cores: 1,
        logicalCores: 1,
        threads: 1,
        model: 'model',
        speed: 1,
      },
      memory: {
        total: 1,
        free: 1,
      },
      gpu: {
        model: 'model',
        memory: 1,
        cuda: {
          version: 'version',
          devices: 'devices',
        },
        vulkan: {
          version: 'version',
          devices: 'devices',
        },
      },
    }
  },
 }))
 jest.mock('fs', () => ({
  default: {
    readdirSync: () => [],
  },
 }))
 jest.mock('child_process', () => ({
  exec: () => {
    return {
      stdout: { on: jest.fn() },
      stderr: { on: jest.fn() },
      on: jest.fn(),
    }
  },
  spawn: () => {
    return {
      stdout: { on: jest.fn() },
      stderr: { on: jest.fn() },
      on: jest.fn(),
      pid: '111',
    }
  },
 }))
 jest.mock('./execute', () => ({
  executableCortexFile: () => {
    return {
      enginePath: 'enginePath',
      executablePath: 'executablePath',
      cudaVisibleDevices: 'cudaVisibleDevices',
      vkVisibleDevices: 'vkVisibleDevices',
    }
  },
 }))
 import index from './index'
 describe('dispose', () => {
  it('should dispose a model successfully on Mac', async () => {
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
    })
    // Call the dispose function
    const result = await index.dispose()
    // Assert that the result is as expected
    expect(result).toBeUndefined()
  })
  it('should kill the subprocess successfully on Windows', async () => {
    Object.defineProperty(process, 'platform', {
      value: 'win32',
    })
    // Call the killSubprocess function
    const result = await index.dispose()
    // Assert that the result is as expected
    expect(result).toBeUndefined()
  })
 })
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -0,0 +1,103 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
 import { executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
 import { appResourcePath } from '@janhq/core/node'
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
 let watchdog: ProcessWatchdog | undefined = undefined
 /**
 * Spawns a Nitro subprocess.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
 function run(systemInfo?: SystemInformation): Promise<any> {
  log(`[CORTEX]:: Spawning cortex subprocess...`)
  return new Promise<void>(async (resolve, reject) => {
    let executableOptions = executableCortexFile(
      // If ngl is not set or equal to 0, run on CPU with correct instructions
      systemInfo?.gpuSetting
        ? {
          ...systemInfo.gpuSetting,
          run_mode: systemInfo.gpuSetting.run_mode,
        }
        : undefined
    )
    // Execute the binary
    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
    addEnvPaths(path.join(appResourcePath(), 'shared'))
    addEnvPaths(executableOptions.binPath)
    addEnvPaths(executableOptions.enginePath)
    const dataFolderPath = getJanDataFolderPath()
    watchdog = new ProcessWatchdog(
      executableOptions.executablePath,
      [
        '--start-server',
        '--port',
        LOCAL_PORT.toString(),
        '--config_file_path',
        `${path.join(dataFolderPath, '.janrc')}`,
        '--data_folder_path',
        dataFolderPath,
      ],
      {
        cwd: executableOptions.enginePath,
        env: {
          ...process.env,
          ENGINE_PATH: executableOptions.enginePath,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
          // Vulkan - Support 1 device at a time for now
          ...(executableOptions.vkVisibleDevices?.length > 0 && {
            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
          }),
        },
      }
    )
    watchdog.start()
    resolve()
  })
 }
 /**
 * Every module should have a dispose function
 * This will be called when the extension is unloaded and should clean up any resources
 * Also called when app is closed
 */
 function dispose() {
  watchdog?.terminate()
 }
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
    process.env.PATH = (process.env.PATH || '').concat(
      path.delimiter,
      dest,
    )
    log(`[CORTEX] PATH: ${process.env.PATH}`)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
      dest,
    )
    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
  }
 }
 /**
 * Cortex process info
 */
 export interface CortexProcessInfo {
  isRunning: boolean
 }
 export default {
  run,
  dispose,
 }
--- a/extensions/inference-cortex-extension/src/node/watchdog.ts
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@ -0,0 +1,84 @@
 import { log } from '@janhq/core/node'
 import { spawn, ChildProcess } from 'child_process'
 import { EventEmitter } from 'events'
 interface WatchdogOptions {
  cwd?: string
  restartDelay?: number
  maxRestarts?: number
  env?: NodeJS.ProcessEnv
 }
 export class ProcessWatchdog extends EventEmitter {
  private command: string
  private args: string[]
  private options: WatchdogOptions
  private process: ChildProcess | null
  private restartDelay: number
  private maxRestarts: number
  private restartCount: number
  private isTerminating: boolean
  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
    super()
    this.command = command
    this.args = args
    this.options = options
    this.process = null
    this.restartDelay = options.restartDelay || 5000
    this.maxRestarts = options.maxRestarts || 5
    this.restartCount = 0
    this.isTerminating = false
  }
  start(): void {
    this.spawnProcess()
  }
  private spawnProcess(): void {
    if (this.isTerminating) return
    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
    this.process = spawn(this.command, this.args, this.options)
    this.process.stdout?.on('data', (data: Buffer) => {
      log(`Process output: ${data}`)
      this.emit('output', data.toString())
    })
    this.process.stderr?.on('data', (data: Buffer) => {
      log(`Process error: ${data}`)
      this.emit('error', data.toString())
    })
    this.process.on('close', (code: number | null) => {
      log(`Process exited with code ${code}`)
      this.emit('close', code)
      if (!this.isTerminating) {
        this.restartProcess()
      }
    })
  }
  private restartProcess(): void {
    if (this.restartCount < this.maxRestarts) {
      this.restartCount++
      log(
        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
      )
      setTimeout(() => this.spawnProcess(), this.restartDelay)
    } else {
      log('Max restart attempts reached. Exiting watchdog.')
      this.emit('maxRestartsReached')
    }
  }
  terminate(): void {
    this.isTerminating = true
    if (this.process) {
      log('Terminating watched process...')
      this.process.kill()
    }
    this.emit('terminated')
  }
 }
--- a/extensions/inference-cortex-extension/tsconfig.json
+++ b/extensions/inference-cortex-extension/tsconfig.json
@ -1,9 +1,8 @@
 {
  "compilerOptions": {
    "moduleResolution": "node",
-    "target": "ES2015",
+    "target": "es2016",
-    "module": "ES2020",
+    "module": "esnext",
    "lib": ["es2015", "es2016", "es2017", "dom"],
    "strict": true,
    "sourceMap": true,
    "declaration": true,
--- a/Show More
+++ b/Show More