Merge pull request #4683 from janhq/chore/sync-release-to-dev

chore: sync release v0.5.15 branch into dev branch
2025-02-18 18:40:03 +07:00 · 2025-02-18 18:40:03 +07:00 · c4d7a143eb
commit c4d7a143eb
parent 29a2a171d5 7d5aa46eef
150 changed files with 3400 additions and 2643 deletions
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@ -9,31 +9,6 @@ jobs:
  get-update-version:
    uses: ./.github/workflows/template-get-update-version.yml
  create-draft-release:
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
    outputs:
      upload_url: ${{ steps.create_release.outputs.upload_url }}
      version: ${{ steps.get_version.outputs.version }}
    permissions:
      contents: write
    steps:
      - name: Extract tag name without v prefix
        id: get_version
        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
        env:
          GITHUB_REF: ${{ github.ref }}
      - name: Create Draft Release
        id: create_release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ github.ref_name }}
          token: ${{ secrets.GITHUB_TOKEN }}
          name: "${{ env.VERSION }}"
          draft: true
          prerelease: false
          generate_release_notes: true
  build-macos:
    uses: ./.github/workflows/template-build-macos.yml
    secrets: inherit
@ -65,7 +40,7 @@ jobs:
      beta: true
  sync-temp-to-latest:
-    needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64]
+    needs: [build-macos, build-windows-x64, build-linux-x64]
    runs-on: ubuntu-latest
    permissions:
      contents: write
@ -82,19 +57,15 @@ jobs:
          AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
          AWS_EC2_METADATA_DISABLED: "true"
      - name: set release to prerelease
        run: |
          gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  noti-discord-and-update-url-readme:
-    needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest]
+    needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
    runs-on: ubuntu-latest
    steps:
      - name: Set version to environment variable
        run: |
-          echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
+          VERSION=${{ needs.get-update-version.outputs.new_version }}
          VERSION="${VERSION#v}"
          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - name: Notify Discord
        uses: Ilshidur/action-discord@master
@ -105,6 +76,5 @@ jobs:
            - macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
            - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
            - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
            - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
        env:
          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
--- a/.github/workflows/template-build-jan-server.yml
+++ b/.github/workflows/template-build-jan-server.yml
@ -1,39 +0,0 @@
 name: build-jan-server
 on:
  workflow_call:
    inputs:
      dockerfile_path:
        required: false
        type: string
        default: './Dockerfile'
      docker_image_tag:
        required: true
        type: string
        default: 'ghcr.io/janhq/jan-server:dev-latest'
 jobs:
  build:
    runs-on: ubuntu-latest
    env:
      REGISTRY: ghcr.io
      IMAGE_NAME: janhq/jan-server
    permissions:
      packages: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Log in to the Container registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Build and push Docker image
        uses: docker/build-push-action@v3
        with:
          context: .
          file: ${{ inputs.dockerfile_path }}
          push: true
          tags: ${{ inputs.docker_image_tag }}
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@ -83,7 +83,7 @@ jobs:
          cat ./electron/package.json
          echo "------------------------"
          cat ./package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@ -99,7 +99,7 @@ jobs:
          cat ./electron/package.json
          echo "------------------------"
          cat ./package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@ -108,7 +108,7 @@ jobs:
          cat ./package.json
          echo "------------------------"
          cat ./electron/scripts/uninstaller.nsh
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
  SystemMonitoring = 'systemMonitoring',
  HuggingFace = 'huggingFace',
  Engine = 'engine',
  Hardware = 'hardware',
 }
 export interface ExtensionType {
--- a/core/src/browser/extensions/engines/OAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.test.ts
@ -38,8 +38,14 @@ describe('OAIEngine', () => {
  it('should subscribe to events on load', () => {
    engine.onLoad()
-    expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function))
+    expect(events.on).toHaveBeenCalledWith(
-    expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function))
+      MessageEvent.OnMessageSent,
      expect.any(Function)
    )
    expect(events.on).toHaveBeenCalledWith(
      InferenceEvent.OnInferenceStopped,
      expect.any(Function)
    )
  })
  it('should handle inference request', async () => {
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
    expect(events.emit).toHaveBeenCalledWith(
      MessageEvent.OnMessageUpdate,
      expect.objectContaining({
-        content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }],
+        content: [
          {
            type: ContentType.Text,
            text: { value: 'test response', annotations: [] },
          },
        ],
        status: MessageStatus.Ready,
      })
    )
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
    await engine.inference(data)
-    expect(events.emit).toHaveBeenCalledWith(
+    expect(events.emit).toHaveBeenLastCalledWith(
      MessageEvent.OnMessageUpdate,
      expect.objectContaining({
-        content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }],
+        status: 'error',
        status: MessageStatus.Error,
        error_code: 500,
      })
    )
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
   */
  override onLoad() {
    super.onLoad()
-    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
    events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
  }
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      complete: async () => {
-        message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+        message.status = message.content.length
          ? MessageStatus.Ready
          : MessageStatus.Error
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      error: async (err: any) => {
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
        message.content[0] = {
          type: ContentType.Text,
          text: {
-            value: err.message,
+            value:
              typeof message === 'string'
                ? err.message
                : (JSON.stringify(err.message) ?? err.detail),
            annotations: [],
          },
        }
--- a/core/src/browser/extensions/engines/helpers/sse.test.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.test.ts
@ -1,14 +1,17 @@
 import { lastValueFrom, Observable } from 'rxjs'
 import { requestInference } from './sse'
-import { ReadableStream } from 'stream/web';
+import { ReadableStream } from 'stream/web'
 describe('requestInference', () => {
  it('should send a request to the inference server and return an Observable', () => {
    // Mock the fetch function
    const mockFetch: any = jest.fn(() =>
      Promise.resolve({
        ok: true,
-        json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+        json: () =>
          Promise.resolve({
            choices: [{ message: { content: 'Generated response' } }],
          }),
        headers: new Headers(),
        redirected: false,
        status: 200,
@ -36,7 +39,10 @@ describe('requestInference', () => {
    const mockFetch: any = jest.fn(() =>
      Promise.resolve({
        ok: false,
-        json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }),
+        json: () =>
          Promise.resolve({
            error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
          }),
        headers: new Headers(),
        redirected: false,
        status: 401,
@ -56,16 +62,22 @@ describe('requestInference', () => {
    // Assert the expected behavior
    expect(result).toBeInstanceOf(Observable)
-    expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' })
+    expect(lastValueFrom(result)).rejects.toEqual({
      message: 'Invalid API Key.',
      code: 'invalid_api_key',
    })
  })
 })
-  it('should handle a successful response with a transformResponse function', () => {
+it('should handle a successful response with a transformResponse function', () => {
  // Mock the fetch function
  const mockFetch: any = jest.fn(() =>
    Promise.resolve({
      ok: true,
-        json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+      json: () =>
        Promise.resolve({
          choices: [{ message: { content: 'Generated response' } }],
        }),
      headers: new Headers(),
      redirected: false,
      status: 200,
@ -78,47 +90,57 @@ describe('requestInference', () => {
  const inferenceUrl = 'https://inference-server.com'
  const requestBody = { message: 'Hello' }
  const model = { id: 'model-id', parameters: { stream: false } }
-    const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase()
+  const transformResponse = (data: any) =>
    data.choices[0].message.content.toUpperCase()
  // Call the function
-    const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse)
+  const result = requestInference(
    inferenceUrl,
    requestBody,
    model,
    undefined,
    undefined,
    transformResponse
  )
  // Assert the expected behavior
  expect(result).toBeInstanceOf(Observable)
  expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
-  })
+})
-
+it('should handle a successful response with streaming enabled', () => {
  it('should handle a successful response with streaming enabled', () => {
  // Mock the fetch function
  const mockFetch: any = jest.fn(() =>
    Promise.resolve({
      ok: true,
      body: new ReadableStream({
        start(controller) {
-            controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}'));
+          controller.enqueue(
-            controller.enqueue(new TextEncoder().encode('data: [DONE]'));
+            new TextEncoder().encode(
-            controller.close();
+              'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
-          }
+            )
          )
          controller.enqueue(new TextEncoder().encode('data: [DONE]'))
          controller.close()
        },
      }),
      headers: new Headers(),
      redirected: false,
      status: 200,
      statusText: 'OK',
    })
-    );
+  )
-    jest.spyOn(global, 'fetch').mockImplementation(mockFetch);
+  jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
  // Define the test inputs
-    const inferenceUrl = 'https://inference-server.com';
+  const inferenceUrl = 'https://inference-server.com'
-    const requestBody = { message: 'Hello' };
+  const requestBody = { message: 'Hello' }
-    const model = { id: 'model-id', parameters: { stream: true } };
+  const model = { id: 'model-id', parameters: { stream: true } }
  // Call the function
-    const result = requestInference(inferenceUrl, requestBody, model);
+  const result = requestInference(inferenceUrl, requestBody, model)
  // Assert the expected behavior
-    expect(result).toBeInstanceOf(Observable);
+  expect(result).toBeInstanceOf(Observable)
-    expect(lastValueFrom(result)).resolves.toEqual('Streamed');
+  expect(lastValueFrom(result)).resolves.toEqual('Streamed')
-  });
+})
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -32,21 +32,20 @@ export function requestInference(
    })
      .then(async (response) => {
        if (!response.ok) {
-          const data = await response.json()
+          if (response.status === 401) {
-          let errorCode = ErrorCode.Unknown
+            throw {
-          if (data.error) {
+              code: ErrorCode.InvalidApiKey,
-            errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown
+              message: 'Invalid API Key.',
          } else if (response.status === 401) {
            errorCode = ErrorCode.InvalidApiKey
            }
          const error = {
            message: data.error?.message ?? data.message ?? 'Error occurred.',
            code: errorCode,
          }
-          subscriber.error(error)
+          let data = await response.json()
-          subscriber.complete()
+          try {
            handleError(data)
          } catch (err) {
            subscriber.error(err)
            return
          }
        }
        // There could be overriden stream parameter in the model
        // that is set in request body (transformed payload)
        if (
@ -54,9 +53,10 @@ export function requestInference(
          model.parameters?.stream === false
        ) {
          const data = await response.json()
-          if (data.error || data.message) {
+          try {
-            subscriber.error(data.error ?? data)
+            handleError(data)
-            subscriber.complete()
+          } catch (err) {
            subscriber.error(err)
            return
          }
          if (transformResponse) {
@ -91,13 +91,10 @@ export function requestInference(
                  const toParse = cachedLines + line
                  if (!line.includes('data: [DONE]')) {
                    const data = JSON.parse(toParse.replace('data: ', ''))
-                    if (
+                    try {
-                      'error' in data ||
+                      handleError(data)
-                      'message' in data ||
+                    } catch (err) {
-                      'detail' in data
+                      subscriber.error(err)
                    ) {
                      subscriber.error(data.error ?? data)
                      subscriber.complete()
                      return
                    }
                    content += data.choices[0]?.delta?.content ?? ''
@ -118,3 +115,18 @@ export function requestInference(
      .catch((err) => subscriber.error(err))
  })
 }
 /**
 * Handle error and normalize it to a common format.
 * @param data
 */
 const handleError = (data: any) => {
  if (
    data.error ||
    data.message ||
    data.detail ||
    (Array.isArray(data) && data.length && data[0].error)
  ) {
    throw data.error ?? data[0]?.error ?? data
  }
 }
--- a/core/src/browser/extensions/hardwareManagement.ts
+++ b/core/src/browser/extensions/hardwareManagement.ts
@ -0,0 +1,26 @@
 import { HardwareInformation } from '../../types'
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
 /**
 * Engine management extension. Persists and retrieves engine management.
 * @abstract
 * @extends BaseExtension
 */
 export abstract class HardwareManagementExtension extends BaseExtension {
  type(): ExtensionTypeEnum | undefined {
    return ExtensionTypeEnum.Hardware
  }
  /**
   * @returns A Promise that resolves to an object of list hardware.
   */
  abstract getHardware(): Promise<HardwareInformation>
  /**
   * @returns A Promise that resolves to an object of set active gpus.
   */
  abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
    message: string
    activated_gpus: number[]
  }>
 }
--- a/core/src/browser/extensions/index.test.ts
+++ b/core/src/browser/extensions/index.test.ts
@ -1,6 +1,5 @@
 import { ConversationalExtension } from './index';
 import { InferenceExtension } from './index';
 import { MonitoringExtension } from './index';
 import { AssistantExtension } from './index';
 import { ModelExtension } from './index';
 import * as Engines from './index';
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
    expect(InferenceExtension).toBeDefined();
  });
  test('should export MonitoringExtension', () => {
    expect(MonitoringExtension).toBeDefined();
  });
  test('should export AssistantExtension', () => {
    expect(AssistantExtension).toBeDefined();
  });
--- a/core/src/browser/extensions/index.ts
+++ b/core/src/browser/extensions/index.ts
@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
 */
 export { InferenceExtension } from './inference'
-/**
+
 * Monitoring extension for system monitoring.
 */
 export { MonitoringExtension } from './monitoring'
 /**
 * Assistant extension for managing assistants.
@ -33,3 +30,8 @@ export * from './engines'
 *  Engines Management
 */
 export * from './enginesManagement'
 /**
 *  Hardware Management
 */
 export * from './hardwareManagement'
--- a/core/src/browser/extensions/monitoring.test.ts
+++ b/core/src/browser/extensions/monitoring.test.ts
@ -1,42 +0,0 @@
 import { ExtensionTypeEnum } from '../extension';
 import { MonitoringExtension } from './monitoring';
 it('should have the correct type', () => {
  class TestMonitoringExtension extends MonitoringExtension {
    getGpuSetting(): Promise<GpuSetting | undefined> {
      throw new Error('Method not implemented.');
    }
    getResourcesInfo(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getCurrentLoad(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getOsInfo(): Promise<OperatingSystemInfo> {
      throw new Error('Method not implemented.');
    }
  }
  const monitoringExtension = new TestMonitoringExtension();
  expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
 });
 it('should create an instance of MonitoringExtension', () => {
  class TestMonitoringExtension extends MonitoringExtension {
    getGpuSetting(): Promise<GpuSetting | undefined> {
      throw new Error('Method not implemented.');
    }
    getResourcesInfo(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getCurrentLoad(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getOsInfo(): Promise<OperatingSystemInfo> {
      throw new Error('Method not implemented.');
    }
  }
  const monitoringExtension = new TestMonitoringExtension();
  expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
 });
--- a/core/src/browser/extensions/monitoring.ts
+++ b/core/src/browser/extensions/monitoring.ts
@ -1,20 +0,0 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
 import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
 /**
 * Monitoring extension for system monitoring.
 * @extends BaseExtension
 */
 export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
  /**
   * Monitoring extension type.
   */
  type(): ExtensionTypeEnum | undefined {
    return ExtensionTypeEnum.SystemMonitoring
  }
  abstract getGpuSetting(): Promise<GpuSetting | undefined>
  abstract getResourcesInfo(): Promise<any>
  abstract getCurrentLoad(): Promise<any>
  abstract getOsInfo(): Promise<OperatingSystemInfo>
 }
--- a/core/src/types/config/appConfigEntity.ts
+++ b/core/src/types/config/appConfigEntity.ts
@ -1,4 +1,5 @@
 export type AppConfiguration = {
  data_folder: string
  quick_ask: boolean
  distinct_id?: string
 }
--- a/core/src/types/engine/index.ts
+++ b/core/src/types/engine/index.ts
@ -18,6 +18,7 @@ export type EngineMetadata = {
      template?: string
    }
  }
  explore_models_url?: string
 }
 export type EngineVariant = {
--- a/core/src/types/hardware/index.ts
+++ b/core/src/types/hardware/index.ts
@ -0,0 +1,55 @@
 export type Cpu = {
  arch: string
  cores: number
  instructions: string[]
  model: string
  usage: number
 }
 export type GpuAdditionalInformation = {
  compute_cap: string
  driver_version: string
 }
 export type Gpu = {
  activated: boolean
  additional_information?: GpuAdditionalInformation
  free_vram: number
  id: string
  name: string
  total_vram: number
  uuid: string
  version: string
 }
 export type Os = {
  name: string
  version: string
 }
 export type Power = {
  battery_life: number
  charging_status: string
  is_power_saving: boolean
 }
 export type Ram = {
  available: number
  total: number
  type: string
 }
 export type Storage = {
  available: number
  total: number
  type: string
 }
 export type HardwareInformation = {
  cpu: Cpu
  gpus: Gpu[]
  os: Os
  power: Power
  ram: Ram
  storage: Storage
 }
--- a/core/src/types/index.test.ts
+++ b/core/src/types/index.test.ts
@ -4,7 +4,6 @@ import * as model from './model';
 import * as thread from './thread';
 import * as message from './message';
 import * as inference from './inference';
 import * as monitoring from './monitoring';
 import * as file from './file';
 import * as config from './config';
 import * as huggingface from './huggingface';
@ -18,7 +17,6 @@ import * as setting from './setting';
      expect(thread).toBeDefined();
      expect(message).toBeDefined();
      expect(inference).toBeDefined();
      expect(monitoring).toBeDefined();
      expect(file).toBeDefined();
      expect(config).toBeDefined();
      expect(huggingface).toBeDefined();
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@ -3,7 +3,6 @@ export * from './model'
 export * from './thread'
 export * from './message'
 export * from './inference'
 export * from './monitoring'
 export * from './file'
 export * from './config'
 export * from './huggingface'
@ -11,3 +10,4 @@ export * from './miscellaneous'
 export * from './api'
 export * from './setting'
 export * from './engine'
 export * from './hardware'
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -1,33 +1,25 @@
 import { GpuAdditionalInformation } from '../hardware'
 export type SystemResourceInfo = {
  memAvailable: number
 }
 export type RunMode = 'cpu' | 'gpu'
 export type GpuSetting = {
  notify: boolean
  run_mode: RunMode
  nvidia_driver: {
    exist: boolean
    version: string
  }
  cuda: {
    exist: boolean
    version: string
  }
  gpus: GpuSettingInfo[]
  gpu_highest_vram: string
  gpus_in_use: string[]
  is_initial: boolean
  // TODO: This needs to be set based on user toggle in settings
  vulkan: boolean
  cpu?: any
 }
 export type GpuSettingInfo = {
  activated: boolean
  free_vram: number
  id: string
  vram: string
  name: string
-  arch?: string
+  total_vram: number
  uuid: string
  version: string
  additional_information?: GpuAdditionalInformation
 }
 export type SystemInformation = {
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
 export type OperatingSystemInfo = {
  platform: SupportedPlatform | 'unknown'
  arch: string
  release: string
  machine: string
  version: string
  totalMem: number
  freeMem: number
 }
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -71,7 +71,7 @@ export type Model = {
  /**
   * The model identifier, modern version of id.
   */
-  mode?: string
+  model?: string
  /**
   * Human-readable name that is used for UI.
@ -150,6 +150,7 @@ export type ModelSettingParams = {
 */
 export type ModelRuntimeParams = {
  temperature?: number
  max_temperature?: number
  token_limit?: number
  top_k?: number
  top_p?: number
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@ -1,13 +0,0 @@
 import * as monitoringInterface from './monitoringInterface'
 import * as resourceInfo from './resourceInfo'
 import * as index from './index'
 it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
  for (const key in monitoringInterface) {
    expect(index[key]).toBe(monitoringInterface[key])
  }
  for (const key in resourceInfo) {
    expect(index[key]).toBe(resourceInfo[key])
  }
 })
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@ -1,2 +0,0 @@
 export * from './monitoringInterface'
 export * from './resourceInfo'
--- a/core/src/types/monitoring/monitoringInterface.ts
+++ b/core/src/types/monitoring/monitoringInterface.ts
@ -1,29 +0,0 @@
 import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
 /**
 * Monitoring extension for system monitoring.
 * @extends BaseExtension
 */
 export interface MonitoringInterface {
  /**
   * Returns information about the system resources.
   * @returns {Promise<any>} A promise that resolves with the system resources information.
   */
  getResourcesInfo(): Promise<any>
  /**
   * Returns the current system load.
   * @returns {Promise<any>} A promise that resolves with the current system load.
   */
  getCurrentLoad(): Promise<any>
  /**
   * Returns the GPU configuration.
   */
  getGpuSetting(): Promise<GpuSetting | undefined>
  /**
   * Returns information about the operating system.
   */
  getOsInfo(): Promise<OperatingSystemInfo>
 }
--- a/core/src/types/monitoring/resourceInfo.ts
+++ b/core/src/types/monitoring/resourceInfo.ts
@ -1,6 +0,0 @@
 export type ResourceInfo = {
  mem: {
    totalMemory: number
    usedMemory: number
  }
 }
--- a/docs/src/pages/docs/configure-extensions.mdx
+++ b/docs/src/pages/docs/configure-extensions.mdx
@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Conversational",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables conversations and state persistence via your filesystem",
+        "description": "This extension enables conversations and state persistence via your filesystem.",
        "url": "extension://@janhq/conversational-extension/dist/index.js"
    },
    "@janhq/inference-anthropic-extension": {
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Anthropic Inference Engine",
        "version": "1.0.2",
        "main": "dist/index.js",
-        "description": "This extension enables Anthropic chat completion API calls",
+        "description": "This extension enables Anthropic chat completion API calls.",
        "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
    },
    "@janhq/inference-triton-trt-llm-extension": {
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Triton-TRT-LLM Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
+        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
        "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
    },
    "@janhq/inference-mistral-extension": {
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "MistralAI Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables Mistral chat completion API calls",
+        "description": "This extension enables Mistral chat completion API calls.",
        "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
    },
    "@janhq/inference-martian-extension": {
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Martian Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables Martian chat completion API calls",
+        "description": "This extension enables Martian chat completion API calls.",
        "url": "extension://@janhq/inference-martian-extension/dist/index.js"
    },
    "@janhq/inference-openrouter-extension": {
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "OpenRouter Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Open Router chat completion API calls",
+        "description": "This extension enables Open Router chat completion API calls.",
        "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
    },
    "@janhq/inference-nvidia-extension": {
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "NVIDIA NIM Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables NVIDIA chat completion API calls",
+        "description": "This extension enables NVIDIA chat completion API calls.",
        "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
    },
    "@janhq/inference-groq-extension": {
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Groq Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables fast Groq chat completion API calls",
+        "description": "This extension enables fast Groq chat completion API calls.",
        "url": "extension://@janhq/inference-groq-extension/dist/index.js"
    },
    "@janhq/inference-openai-extension": {
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "OpenAI Inference Engine",
        "version": "1.0.2",
        "main": "dist/index.js",
-        "description": "This extension enables OpenAI chat completion API calls",
+        "description": "This extension enables OpenAI chat completion API calls.",
        "url": "extension://@janhq/inference-openai-extension/dist/index.js"
    },
    "@janhq/inference-cohere-extension": {
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Cohere Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Cohere chat completion API calls",
+        "description": "This extension enables Cohere chat completion API calls.",
        "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
    },
    "@janhq/model-extension": {
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Model Management",
        "version": "1.0.33",
        "main": "dist/index.js",
-        "description": "Model Management Extension provides model exploration and seamless downloads",
+        "description": "Model Management Extension provides model exploration and seamless downloads.",
        "url": "extension://@janhq/model-extension/dist/index.js"
    },
    "@janhq/monitoring-extension": {
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "System Monitoring",
        "version": "1.0.10",
        "main": "dist/index.js",
-        "description": "This extension provides system health and OS level data",
+        "description": "This extension provides system health and OS level data.",
        "url": "extension://@janhq/monitoring-extension/dist/index.js"
    },
    "@janhq/assistant-extension": {
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Jan Assistant",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
+        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
        "url": "extension://@janhq/assistant-extension/dist/index.js"
    },
    "@janhq/tensorrt-llm-extension": {
--- a/docs/src/pages/docs/install-engines.mdx
+++ b/docs/src/pages/docs/install-engines.mdx
@ -47,8 +47,8 @@ To add a new remote engine:
 |-------|-------------|----------|
 | Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
 | API URL | The base URL of the provider's API | ✓ |
-| API Key | Your authentication key from the provider | ✓ |
+| API Key | Your authentication key to activate this engine | ✓ |
-| Model List URL | URL for fetching available models | |
+| Model List URL | The endpoint URL to fetch available models |
 | API Key Template | Custom authorization header format | |
 | Request Format Conversion | Function to convert Jan's request format to provider's format | |
 | Response Format Conversion | Function to convert provider's response format to Jan's format | |
--- a/electron/main.ts
+++ b/electron/main.ts
@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
 import { trayManager } from './managers/tray'
 import { logSystemInfo } from './utils/system'
 import { registerGlobalShortcuts } from './utils/shortcut'
 import { registerLogger } from './utils/logger'
 const preloadPath = join(__dirname, 'preload.js')
 const rendererPath = join(__dirname, '..', 'renderer')
@ -79,6 +80,7 @@ app
  })
  .then(setupCore)
  .then(createUserSpace)
  .then(registerLogger)
  .then(migrate)
  .then(setupExtensions)
  .then(setupMenu)
--- a/electron/package.json
+++ b/electron/package.json
@ -1,6 +1,6 @@
 {
  "name": "jan",
-  "version": "0.1.4",
+  "version": "0.1.1737985524",
  "main": "./build/main.js",
  "author": "Jan <service@jan.ai>",
  "license": "MIT",
--- a/extensions/monitoring-extension/src/node/logger.ts
+++ b/extensions/monitoring-extension/src/node/logger.ts
@ -1,16 +1,28 @@
-import fs from 'fs'
+import {
  createWriteStream,
  existsSync,
  mkdirSync,
  readdir,
  stat,
  unlink,
  writeFileSync,
 } from 'fs'
 import util from 'util'
 import {
  getAppConfigurations,
  getJanDataFolderPath,
  Logger,
  LoggerManager,
 } from '@janhq/core/node'
 import path, { join } from 'path'
-export class FileLogger extends Logger {
+/**
 * File Logger
 */
 export class FileLogger implements Logger {
  name = 'file'
  logCleaningInterval: number = 120000
-  timeout: NodeJS.Timeout | null = null
+  timeout: NodeJS.Timeout | undefined
  appLogPath: string = './'
  logEnabled: boolean = true
@ -18,14 +30,13 @@ export class FileLogger extends Logger {
    logEnabled: boolean = true,
    logCleaningInterval: number = 120000
  ) {
    super()
    this.logEnabled = logEnabled
    if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
    const appConfigurations = getAppConfigurations()
    const logFolderPath = join(appConfigurations.data_folder, 'logs')
-    if (!fs.existsSync(logFolderPath)) {
+    if (!existsSync(logFolderPath)) {
-      fs.mkdirSync(logFolderPath, { recursive: true })
+      mkdirSync(logFolderPath, { recursive: true })
    }
    this.appLogPath = join(logFolderPath, 'app.log')
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
    const logDirectory = path.join(getJanDataFolderPath(), 'logs')
    // Perform log cleaning
    const currentDate = new Date()
-    if (fs.existsSync(logDirectory))
+    if (existsSync(logDirectory))
-      fs.readdir(logDirectory, (err, files) => {
+      readdir(logDirectory, (err, files) => {
        if (err) {
          console.error('Error reading log directory:', err)
          return
@ -78,7 +89,7 @@ export class FileLogger extends Logger {
        files.forEach((file) => {
          const filePath = path.join(logDirectory, file)
-          fs.stat(filePath, (err, stats) => {
+          stat(filePath, (err, stats) => {
            if (err) {
              console.error('Error getting file stats:', err)
              return
@ -86,7 +97,7 @@ export class FileLogger extends Logger {
            // Check size
            if (stats.size > size) {
-              fs.unlink(filePath, (err) => {
+              unlink(filePath, (err) => {
                if (err) {
                  console.error('Error deleting log file:', err)
                  return
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
                  (1000 * 3600 * 24)
              )
              if (daysDifference > days) {
-                fs.unlink(filePath, (err) => {
+                unlink(filePath, (err) => {
                  if (err) {
                    console.error('Error deleting log file:', err)
                    return
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
  }
 }
 /**
 * Write log function implementation
 * @param message
 * @param logPath
 */
 const writeLog = (message: string, logPath: string) => {
-  if (!fs.existsSync(logPath)) {
+  if (!existsSync(logPath)) {
    const logDirectory = path.join(getJanDataFolderPath(), 'logs')
-    if (!fs.existsSync(logDirectory)) {
+    if (!existsSync(logDirectory)) {
-      fs.mkdirSync(logDirectory)
+      mkdirSync(logDirectory)
    }
-    fs.writeFileSync(logPath, message)
+    writeFileSync(logPath, message)
  } else {
-    const logFile = fs.createWriteStream(logPath, {
+    const logFile = createWriteStream(logPath, {
      flags: 'a',
    })
    logFile.write(util.format(message) + '\n')
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
    console.debug(message)
  }
 }
 /**
 * Register logger for global application logging
 */
 export const registerLogger = () => {
  const logger = new FileLogger()
  LoggerManager.instance().register(logger)
  logger.cleanLogs()
 }
--- a/extensions/engine-management-extension/engines.mjs
+++ b/extensions/engine-management-extension/engines.mjs
@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
 import martian from './resources/martian.json' with { type: 'json' }
 import mistral from './resources/mistral.json' with { type: 'json' }
 import nvidia from './resources/nvidia.json' with { type: 'json' }
 import deepseek from './resources/deepseek.json' with { type: 'json' }
 import googleGemini from './resources/google_gemini.json' with { type: 'json' }
 import anthropicModels from './models/anthropic.json' with { type: 'json' }
 import cohereModels from './models/cohere.json' with { type: 'json' }
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
 import martianModels from './models/martian.json' with { type: 'json' }
 import mistralModels from './models/mistral.json' with { type: 'json' }
 import nvidiaModels from './models/nvidia.json' with { type: 'json' }
 import deepseekModels from './models/deepseek.json' with { type: 'json' }
 import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
 const engines = [
  anthropic,
@ -25,6 +29,8 @@ const engines = [
  mistral,
  martian,
  nvidia,
  deepseek,
  googleGemini,
 ]
 const models = [
  ...anthropicModels,
@ -35,5 +41,7 @@ const models = [
  ...mistralModels,
  ...martianModels,
  ...nvidiaModels,
  ...deepseekModels,
  ...googleGeminiModels,
 ]
 export { engines, models }
--- a/extensions/engine-management-extension/models/anthropic.json
+++ b/extensions/engine-management-extension/models/anthropic.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
@ -21,6 +22,7 @@
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
@ -34,6 +36,7 @@
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
--- a/extensions/engine-management-extension/models/cohere.json
+++ b/extensions/engine-management-extension/models/cohere.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": false
    },
    "engine": "cohere"
@ -21,6 +22,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": false
    },
    "engine": "cohere"
--- a/extensions/engine-management-extension/models/deepseek.json
+++ b/extensions/engine-management-extension/models/deepseek.json
@ -0,0 +1,28 @@
 [
  {
    "model": "deepseek-chat",
    "object": "model",
    "name": "DeepSeek Chat",
    "version": "1.0",
    "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "deepseek"
  },
  {
    "model": "deepseek-reasoner",
    "object": "model",
    "name": "DeepSeek R1",
    "version": "1.0",
    "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "deepseek"
  }
 ]
--- a/extensions/engine-management-extension/models/google_gemini.json
+++ b/extensions/engine-management-extension/models/google_gemini.json
@ -0,0 +1,67 @@
 [
  {
    "model": "gemini-2.0-flash",
    "object": "model",
    "name": "Gemini 2.0 Flash",
    "version": "1.0",
    "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-2.0-flash-lite-preview",
    "object": "model",
    "name": "Gemini 2.0 Flash-Lite Preview",
    "version": "1.0",
    "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-1.5-flash",
    "object": "model",
    "name": "Gemini 1.5 Flash",
    "version": "1.0",
    "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-1.5-flash-8b",
    "object": "model",
    "name": "Gemini 1.5 Flash-8B",
    "version": "1.0",
    "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-1.5-pro",
    "object": "model",
    "name": "Gemini 1.5 Pro",
    "version": "1.0",
    "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  }
 ]
--- a/extensions/engine-management-extension/models/mistral.json
+++ b/extensions/engine-management-extension/models/mistral.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
@ -22,6 +23,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
@ -36,6 +38,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
--- a/extensions/engine-management-extension/models/nvidia.json
+++ b/extensions/engine-management-extension/models/nvidia.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 1024,
      "temperature": 0.3,
      "max_temperature": 1.0,
      "top_p": 1,
      "stream": false,
      "frequency_penalty": 0,
--- a/extensions/engine-management-extension/models/openai.json
+++ b/extensions/engine-management-extension/models/openai.json
@ -79,12 +79,7 @@
    "description": "OpenAI o1 is a new model with complex reasoning",
    "format": "api",
    "inference_params": {
-      "max_tokens": 100000,
+      "max_tokens": 100000
      "temperature": 1,
      "top_p": 1,
      "stream": true,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "engine": "openai"
  },
@ -97,11 +92,7 @@
    "format": "api",
    "inference_params": {
      "max_tokens": 32768,
-      "temperature": 1,
+      "stream": true
      "top_p": 1,
      "stream": true,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "engine": "openai"
  },
@ -114,11 +105,20 @@
    "format": "api",
    "inference_params": {
      "max_tokens": 65536,
-      "temperature": 1,
+      "stream": true
-      "top_p": 1,
+    },
-      "stream": true,
+    "engine": "openai"
-      "frequency_penalty": 0,
+  },
-      "presence_penalty": 0
+  {
    "model": "o3-mini",
    "object": "model",
    "name": "OpenAI o3-mini",
    "version": "1.0",
    "description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
    "format": "api",
    "inference_params": {
      "max_tokens": 100000,
      "stream": true
    },
    "engine": "openai"
  }
--- a/extensions/engine-management-extension/models/openrouter.json
+++ b/extensions/engine-management-extension/models/openrouter.json
@ -1,16 +1,91 @@
 [
  {
-    "model": "open-router-auto",
+    "model": "deepseek/deepseek-r1:free",
    "object": "model",
-    "name": "OpenRouter",
+    "name": "DeepSeek: R1",
    "version": "1.0",
-    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "max_tokens": 128000,
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
-      "presence_penalty": 0
+      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
    "object": "model",
    "name": "DeepSeek: R1 Distill Llama 70B",
    "version": "1.0",
    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
    "object": "model",
    "name": "DeepSeek: R1 Distill Llama 70B",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "meta-llama/llama-3.1-405b-instruct:free",
    "object": "model",
    "name": "Meta: Llama 3.1 405B Instruct",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "qwen/qwen-vl-plus:free",
    "object": "model",
    "name": "Qwen: Qwen VL Plus",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "qwen/qwen2.5-vl-72b-instruct:free",
    "object": "model",
    "name": "Qwen: Qwen2.5 VL 72B Instruct",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  }
--- a/extensions/engine-management-extension/package.json
+++ b/extensions/engine-management-extension/package.json
@ -29,12 +29,10 @@
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "cpu-instructions": "^0.0.13",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1"
  },
  "bundledDependencies": [
    "cpu-instructions",
    "@janhq/core"
  ],
  "engines": {
--- a/extensions/engine-management-extension/resources/anthropic.json
+++ b/extensions/engine-management-extension/resources/anthropic.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-anthropic-extension",
+  "id": "anthropic",
  "type": "remote",
  "engine": "anthropic",
  "url": "https://console.anthropic.com/settings/keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.anthropic.com/v1/messages",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
+        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
      }
      }
    },
    "explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
  }
 }
--- a/extensions/engine-management-extension/resources/cohere.json
+++ b/extensions/engine-management-extension/resources/cohere.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-cohere-extension",
+  "id": "cohere",
  "type": "remote",
  "engine": "cohere",
  "url": "https://dashboard.cohere.com/api-keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.cohere.ai/v1/chat",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %}  \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
      }
-    }
+    },
    "explore_models_url": "https://docs.cohere.com/v2/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/deepseek.json
+++ b/extensions/engine-management-extension/resources/deepseek.json
@ -0,0 +1,23 @@
 {
  "id": "deepseek",
  "type": "remote",
  "engine": "deepseek",
  "url": "https://platform.deepseek.com/api_keys",
  "api_key": "",
  "metadata": {
    "get_models_url": "https://api.deepseek.com/models",
    "header_template": "Authorization: Bearer {{api_key}}",
    "transform_req": {
      "chat_completions": {
        "url": "https://api.deepseek.com/chat/completions",
        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
    },
    "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
  }
 }
--- a/extensions/engine-management-extension/resources/google_gemini.json
+++ b/extensions/engine-management-extension/resources/google_gemini.json
@ -0,0 +1,23 @@
 {
  "id": "google_gemini",
  "type": "remote",
  "engine": "google_gemini",
  "url": "https://aistudio.google.com/apikey",
  "api_key": "",
  "metadata": {
    "get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
    "header_template": "Authorization: Bearer {{api_key}}",
    "transform_req": {
      "chat_completions": {
        "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
    },
    "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
  }
 }
--- a/extensions/engine-management-extension/resources/groq.json
+++ b/extensions/engine-management-extension/resources/groq.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-groq-extension",
+  "id": "groq",
  "type": "remote",
  "engine": "groq",
  "url": "https://console.groq.com/keys",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://console.groq.com/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/martian.json
+++ b/extensions/engine-management-extension/resources/martian.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-martian-extension",
+  "id": "martian",
  "type": "remote",
  "engine": "martian",
  "url": "https://withmartian.com/dashboard",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://withmartian.github.io/llm-adapters/"
  }
 }
--- a/extensions/engine-management-extension/resources/mistral.json
+++ b/extensions/engine-management-extension/resources/mistral.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-mistral-extension",
+  "id": "mistral",
  "type": "remote",
  "engine": "mistral",
  "url": "https://console.mistral.ai/api-keys/",
@ -17,6 +17,7 @@
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
-    }
+    },
    "explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
  }
 }
--- a/extensions/engine-management-extension/resources/nvidia.json
+++ b/extensions/engine-management-extension/resources/nvidia.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-nvidia-extension",
+  "id": "nvidia",
  "type": "remote",
  "engine": "nvidia",
  "url": "https://org.ngc.nvidia.com/setup/personal-keys",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://build.nvidia.com/models"
  }
 }
--- a/extensions/engine-management-extension/resources/openai.json
+++ b/extensions/engine-management-extension/resources/openai.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-openai-extension",
+  "id": "openai",
  "type": "remote",
  "engine": "openai",
  "url": "https://platform.openai.com/account/api-keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.openai.com/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://platform.openai.com/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/openrouter.json
+++ b/extensions/engine-management-extension/resources/openrouter.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-openrouter-extension",
+  "id": "openrouter",
  "type": "remote",
  "engine": "openrouter",
  "url": "https://openrouter.ai/keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://openrouter.ai/api/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://openrouter.ai/models"
  }
 }
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@ -13,9 +13,19 @@ export default defineConfig([
      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
      API_URL: JSON.stringify('http://127.0.0.1:39291'),
      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
      PLATFORM: JSON.stringify(process.platform),
      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
      DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
      DEFAULT_REMOTE_MODELS: JSON.stringify(models),
      DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
        `{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
      ),
      DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
        '{{tojson(input_request)}}'
      ),
      DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
        'Authorization: Bearer {{api_key}}'
      ),
    },
  },
  {
@ -29,15 +39,4 @@ export default defineConfig([
      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
    },
  },
  {
    input: 'src/node/cpuInfo.ts',
    output: {
      format: 'cjs',
      file: 'dist/node/cpuInfo.js',
    },
    external: ['cpu-instructions'],
    resolve: {
      extensions: ['.ts', '.js', '.svg'],
    },
  },
 ])
--- a/extensions/engine-management-extension/src/@types/global.d.ts
+++ b/extensions/engine-management-extension/src/@types/global.d.ts
@ -1,7 +1,11 @@
 declare const API_URL: string
 declare const CORTEX_ENGINE_VERSION: string
 declare const PLATFORM: string
 declare const SOCKET_URL: string
 declare const NODE: string
 declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
 declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
 declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
 declare const DEFAULT_REMOTE_ENGINES: ({
  id: string
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
 import PQueue from 'p-queue'
 import { EngineError } from './error'
 import { getJanDataFolderPath } from '@janhq/core'
 import { engineVariant } from './utils'
 interface ModelList {
  data: Model[]
 }
 /**
- * JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
+ * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
 * functionality for managing engines.
 */
-export default class JSONEngineManagementExtension extends EngineManagementExtension {
+export default class JanEngineManagementExtension extends EngineManagementExtension {
  queue = new PQueue({ concurrency: 1 })
  /**
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * @returns A Promise that resolves to an object of list engines.
   */
  async getRemoteModels(name: string): Promise<any> {
-    return this.queue.add(() =>
+    return ky
      ky
      .get(`${API_URL}/v1/models/remote/${name}`)
-        .json<Model[]>()
+      .json<ModelList>()
-        .then((e) => e)
+      .catch(() => ({
-        .catch(() => [])
+        data: [],
-    ) as Promise<Model[]>
+      })) as Promise<ModelList>
  }
  /**
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * Add a new remote engine
   * @returns A Promise that resolves to intall of engine.
   */
-  async addRemoteEngine(engineConfig: EngineConfig) {
+  async addRemoteEngine(
    engineConfig: EngineConfig,
    persistModels: boolean = true
  ) {
    // Populate default settings
    if (
      engineConfig.metadata?.transform_req?.chat_completions &&
      !engineConfig.metadata.transform_req.chat_completions.template
    )
      engineConfig.metadata.transform_req.chat_completions.template =
        DEFAULT_REQUEST_PAYLOAD_TRANSFORM
    if (
      engineConfig.metadata?.transform_resp?.chat_completions &&
      !engineConfig.metadata.transform_resp.chat_completions?.template
    )
      engineConfig.metadata.transform_resp.chat_completions.template =
        DEFAULT_RESPONSE_BODY_TRANSFORM
    if (engineConfig.metadata && !engineConfig.metadata?.header_template)
      engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
    return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
+      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
        if (persistModels && engineConfig.metadata?.get_models_url) {
          // Pull /models from remote models endpoint
          return this.populateRemoteModels(engineConfig)
            .then(() => e)
            .catch(() => e)
        }
        return e
      })
    ) as Promise<{ messages: string }>
  }
@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * @param model - Remote model object.
   */
  async addRemoteModel(model: Model) {
-    return this.queue.add(() =>
+    return this.queue
-      ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
+      .add(() =>
        ky
          .post(`${API_URL}/v1/models/add`, {
            json: {
              inference_params: {
                max_tokens: 4096,
                temperature: 0.7,
                top_p: 0.95,
                stream: true,
                frequency_penalty: 0,
                presence_penalty: 0,
              },
              ...model,
            },
          })
          .then((e) => e)
      )
      .then(() => {})
  }
  /**
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
        error instanceof EngineError
      ) {
        const systemInfo = await systemInformation()
-        const variant = await executeOnMain(
+        const variant = await engineVariant(systemInfo.gpuSetting)
          NODE,
          'engineVariant',
          systemInfo.gpuSetting
        )
        await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
          variant: variant,
          version: `${CORTEX_ENGINE_VERSION}`,
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
          data.api_key = api_key
          /// END - Migrate legacy api key settings
-          await this.addRemoteEngine(data).catch(console.error)
+          await this.addRemoteEngine(data, false).catch(console.error)
        })
      )
      events.emit(EngineEvent.OnEngineUpdate, {})
-      DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
+      await Promise.all(
-        await this.addRemoteModel(data).catch(() => {})
+        DEFAULT_REMOTE_MODELS.map((data: Model) =>
-      })
+          this.addRemoteModel(data).catch(() => {})
        )
      )
      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
    }
  }
  /**
   * Pulls models list from the remote provider and persist
   * @param engineConfig
   * @returns
   */
  private populateRemoteModels = async (engineConfig: EngineConfig) => {
    return this.getRemoteModels(engineConfig.engine)
      .then((models: ModelList) => {
        if (models?.data)
          Promise.all(
            models.data.map((model) =>
              this.addRemoteModel({
                ...model,
                engine: engineConfig.engine as InferenceEngine,
                model: model.model ?? model.id,
              }).catch(console.info)
            )
          ).then(() => {
            events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
          })
      })
      .catch(console.info)
  }
 }
--- a/extensions/engine-management-extension/src/node/cpuInfo.ts
+++ b/extensions/engine-management-extension/src/node/cpuInfo.ts
@ -1,27 +0,0 @@
 import { cpuInfo } from 'cpu-instructions'
 // Check the CPU info and determine the supported instruction set
 const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
  ? 'avx512'
  : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
    ? 'avx2'
    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
      ? 'avx'
      : 'noavx'
 // Send the result and wait for confirmation before exiting
 new Promise<void>((resolve, reject) => {
  // @ts-ignore
  process.send(info, (error: Error | null) => {
    if (error) {
      reject(error)
    } else {
      resolve()
    }
  })
 })
  .then(() => process.exit(0))
  .catch((error) => {
    console.error('Failed to send info:', error)
    process.exit(1)
  })
--- a/extensions/engine-management-extension/src/node/index.test.ts
+++ b/extensions/engine-management-extension/src/node/index.test.ts
@ -1,7 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
 import engine from './index'
-import { GpuSetting } from '@janhq/core/node'
+import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 import { fork } from 'child_process'
 let testSettings: GpuSetting = {
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
 }
 const originalPlatform = process.platform
 jest.mock('cpu-instructions', () => ({
  cpuInfo: {
    cpuInfo: jest.fn(),
  },
 }))
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
-jest.mock('@janhq/core/node', () => ({
+
 jest.mock('@janhq/core', () => ({
  appResourcePath: () => '.',
  log: jest.fn(),
 }))
 jest.mock('child_process', () => ({
  fork: jest.fn(),
 }))
 const mockFork = fork as jest.Mock
 describe('test executable cortex file', () => {
  afterAll(function () {
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
  })
  it('executes on MacOS', () => {
-    const mockProcess = {
+
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('noavx')
        }
      }),
      send: jest.fn(),
    }
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
    })
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
      value: 'arm64',
    })
-    mockFork.mockReturnValue(mockProcess)
+
    expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
  })
@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
-    mockFork.mockReturnValue(mockProcess)
+
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
    mockFork.mockReturnValue(mockProcess)
    expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
  })
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
    mockFork.mockReturnValue(mockProcess)
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-avx2-cuda-11-7'
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
        },
      ],
    }
    mockFork.mockReturnValue({
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('noavx')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-noavx-cuda-12-0'
    )
-    mockFork.mockReturnValue({
+
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('avx512')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-avx2-cuda-12-0'
    )
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
    mockFork.mockReturnValue({
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('noavx')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
  })
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
        },
      ],
    }
    mockFork.mockReturnValue({
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('avx512')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toBe(
      'linux-amd64-avx2-cuda-11-7'
    )
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-    mockFork.mockReturnValue({
+
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('avx2')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'linux-amd64-avx2-cuda-12-0'
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
      mockFork.mockReturnValue({
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-${instruction}`
      )
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `windows-amd64-${instruction}`
      )
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-vulkan`
      )
--- a/extensions/engine-management-extension/src/node/index.ts
+++ b/extensions/engine-management-extension/src/node/index.ts
@ -2,111 +2,10 @@ import * as path from 'path'
 import {
  appResourcePath,
  getJanDataFolderPath,
  GpuSetting,
  log,
 } from '@janhq/core/node'
 import { fork } from 'child_process'
 import { mkdir, readdir, symlink } from 'fs/promises'
 /**
 * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
 * @param settings
 * @returns
 */
 const gpuRunMode = (settings?: GpuSetting): string => {
  if (process.platform === 'darwin')
    // MacOS now has universal binaries
    return ''
  if (!settings) return ''
  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
 }
 /**
 * The OS & architecture that the current process is running on.
 * @returns win, mac-x64, mac-arm64, or linux
 */
 const os = (): string => {
  return process.platform === 'win32'
    ? 'windows-amd64'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
        ? 'mac-arm64'
        : 'mac-amd64'
      : 'linux-amd64'
 }
 /**
 * The CUDA version that will be set - either '11-7' or '12-0'.
 * @param settings
 * @returns
 */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  const isUsingCuda =
    settings?.vulkan !== true &&
    settings?.run_mode === 'gpu' &&
    !os().includes('mac')
  if (!isUsingCuda) return undefined
  return settings?.cuda?.version === '11' ? '11-7' : '12-0'
 }
 /**
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
 const cpuInstructions = async (): Promise<string> => {
  if (process.platform === 'darwin') return ''
  const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
  return new Promise((resolve, reject) => {
    child.on('message', (cpuInfo?: string) => {
      resolve(cpuInfo ?? 'noavx')
      child.kill() // Kill the child process after receiving the result
    })
    child.on('error', (err) => {
      resolve('noavx')
      child.kill()
    })
    child.on('exit', (code) => {
      if (code !== 0) {
        resolve('noavx')
        child.kill()
      }
    })
  })
 }
 /**
 * Find which variant to run based on the current platform.
 */
 const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
  const cpuInstruction = await cpuInstructions()
  log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
  let engineVariant = [
    os(),
    gpuSetting?.vulkan
      ? 'vulkan'
      : gpuRunMode(gpuSetting) !== 'cuda'
        ? // CPU mode - support all variants
          cpuInstruction
        : // GPU mode - packaged CUDA variants of avx2 and noavx
          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
          ? 'avx2'
          : 'noavx',
    gpuRunMode(gpuSetting),
    cudaVersion(gpuSetting),
  ]
    .filter((e) => !!e)
    .join('-')
  log(`[CORTEX]: Engine variant: ${engineVariant}`)
  return engineVariant
 }
 /**
 * Create symlink to each variant for the default bundled version
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
 }
 export default {
  engineVariant,
  symlinkEngines,
 }
--- a/extensions/engine-management-extension/src/utils.ts
+++ b/extensions/engine-management-extension/src/utils.ts
@ -0,0 +1,86 @@
 import { GpuSetting, log } from '@janhq/core'
 /**
 * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
 * @param settings
 * @returns
 */
 const gpuRunMode = (settings?: GpuSetting): string => {
  return settings.gpus?.some(
    (gpu) =>
      gpu.activated === true &&
      gpu.additional_information &&
      gpu.additional_information.driver_version
  )
    ? 'cuda'
    : ''
 }
 /**
 * The OS & architecture that the current process is running on.
 * @returns win, mac-x64, mac-arm64, or linux
 */
 const os = (settings?: GpuSetting): string => {
  return PLATFORM === 'win32'
    ? 'windows-amd64'
    : PLATFORM === 'darwin'
    ? settings?.cpu?.arch === 'arm64'
      ? 'mac-arm64'
      : 'mac-amd64'
    : 'linux-amd64'
 }
 /**
 * The CUDA version that will be set - either '11-7' or '12-0'.
 * @param settings
 * @returns
 */
 const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
  const isUsingCuda =
    settings?.vulkan !== true &&
    settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
    !os().includes('mac')
  if (!isUsingCuda) return undefined
  // return settings?.cuda?.version === '11' ? '11-7' : '12-0'
  return settings.gpus?.some((gpu) => gpu.version.includes('12'))
    ? '12-0'
    : '11-7'
 }
 /**
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
 /**
 * Find which variant to run based on the current platform.
 */
 export const engineVariant = async (
  gpuSetting?: GpuSetting
 ): Promise<string> => {
  const platform = os(gpuSetting)
  // There is no need to append the variant extension for mac
  if (platform.startsWith('mac')) return platform
  let engineVariant =
    gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
      ? [platform, 'vulkan']
      : [
          platform,
          gpuRunMode(gpuSetting) === 'cuda' &&
          (gpuSetting.cpu.instructions.includes('avx2') ||
            gpuSetting.cpu.instructions.includes('avx512'))
            ? 'avx2'
            : 'noavx',
          gpuRunMode(gpuSetting),
          cudaVersion(gpuSetting),
        ].filter(Boolean) // Remove any falsy values
  let engineVariantString = engineVariant.join('-')
  log(`[CORTEX]: Engine variant: ${engineVariantString}`)
  return engineVariantString
 }
--- a/extensions/hardware-management-extension/jest.config.js
+++ b/extensions/hardware-management-extension/jest.config.js
@ -0,0 +1,5 @@
 /** @type {import('ts-jest').JestConfigWithTsJest} */
 module.exports = {
  preset: 'ts-jest',
  testEnvironment: 'node',
 }
--- a/extensions/hardware-management-extension/package.json
+++ b/extensions/hardware-management-extension/package.json
@ -0,0 +1,48 @@
 {
  "name": "@janhq/hardware-management-extension",
  "productName": "Hardware Management",
  "version": "1.0.0",
  "description": "Manages Better Hardware settings.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "MIT",
  "scripts": {
    "test": "jest",
    "build": "rolldown -c rolldown.config.mjs",
    "codesign:darwin": "../../.github/scripts/auto-sign.sh",
    "codesign:win32:linux": "echo 'No codesigning required'",
    "codesign": "run-script-os",
    "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/module.js"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
    "rolldown": "^1.0.0-beta.1",
    "run-script-os": "^1.1.6",
    "ts-loader": "^9.5.0",
    "typescript": "^5.3.3"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "cpu-instructions": "^0.0.13",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1"
  },
  "bundledDependencies": [
    "cpu-instructions",
    "@janhq/core"
  ],
  "hardwares": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ]
 }
--- a/extensions/hardware-management-extension/rolldown.config.mjs
+++ b/extensions/hardware-management-extension/rolldown.config.mjs
@ -0,0 +1,17 @@
 import { defineConfig } from 'rolldown'
 import pkgJson from './package.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
    output: {
      format: 'esm',
      file: 'dist/index.js',
    },
    define: {
      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
      API_URL: JSON.stringify('http://127.0.0.1:39291'),
      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
    },
  },
 ])
--- a/extensions/hardware-management-extension/src/@types/global.d.ts
+++ b/extensions/hardware-management-extension/src/@types/global.d.ts
@ -0,0 +1,12 @@
 declare const API_URL: string
 declare const SOCKET_URL: string
 declare const NODE: string
 interface Core {
  api: APIFunctions
  events: EventEmitter
 }
 interface Window {
  core?: Core | undefined
  electronAPI?: any | undefined
 }
--- a/extensions/hardware-management-extension/src/index.ts
+++ b/extensions/hardware-management-extension/src/index.ts
@ -0,0 +1,67 @@
 import {
  executeOnMain,
  HardwareManagementExtension,
  HardwareInformation,
 } from '@janhq/core'
 import ky from 'ky'
 import PQueue from 'p-queue'
 /**
 * JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
 * functionality for managing engines.
 */
 export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
  queue = new PQueue({ concurrency: 1 })
  /**
   * Called when the extension is loaded.
   */
  async onLoad() {
    // Run Healthcheck
    this.queue.add(() => this.healthz())
  }
  /**
   * Called when the extension is unloaded.
   */
  onUnload() {}
  /**
   * Do health check on cortex.cpp
   * @returns
   */
  async healthz(): Promise<void> {
    return ky
      .get(`${API_URL}/healthz`, {
        retry: { limit: 20, delay: () => 500, methods: ['get'] },
      })
      .then(() => {})
  }
  /**
   * @returns A Promise that resolves to an object of hardware.
   */
  async getHardware(): Promise<HardwareInformation> {
    return this.queue.add(() =>
      ky
        .get(`${API_URL}/v1/hardware`)
        .json<HardwareInformation>()
        .then((e) => e)
    ) as Promise<HardwareInformation>
  }
  /**
   * @returns A Promise that resolves to an object of set gpu activate.
   */
  async setAvtiveGpu(data: { gpus: number[] }): Promise<{
    message: string
    activated_gpus: number[]
  }> {
    return this.queue.add(() =>
      ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
    ) as Promise<{
      message: string
      activated_gpus: number[]
    }>
  }
 }
--- a/extensions/hardware-management-extension/tsconfig.json
+++ b/extensions/hardware-management-extension/tsconfig.json
@ -8,7 +8,9 @@
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
-    "rootDir": "./src"
+    "rootDir": "./src",
    "resolveJsonModule": true
  },
-  "include": ["./src"]
+  "include": ["./src"],
  "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
 }
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.9-rc7
+1.0.10
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.24",
+  "version": "1.0.25",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@ -76,7 +76,7 @@
  },
  {
    "key": "use_mmap",
-    "title": "MMAP",
+    "title": "mmap",
    "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
    "controllerType": "checkbox",
    "controllerProps": {
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-llama-70b",
  "object": "model",
  "name": "DeepSeek R1 Distill Llama 70B Q4",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
    "ngl": 81
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["70B", "Featured"],
    "size": 42500000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-llama-8b",
  "object": "model",
  "name": "DeepSeek R1 Distill Llama 8B Q5",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
    "ngl": 33
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["8B", "Featured"],
    "size": 5730000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-1.5b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 1.5B Q5",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
    "ngl": 29
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["1.5B", "Featured"],
    "size": 1290000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-14b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 14B Q4",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
    "ngl": 49
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["14B", "Featured"],
    "size": 8990000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-32b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 32B Q4",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
    "ngl": 65
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["32B", "Featured"],
    "size": 19900000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-7b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 7B Q5",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
    "ngl": 29
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["7B", "Featured"],
    "size": 5440000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -22,19 +22,13 @@
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 8192,
-    "stop": [
+    "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
      "<|end_of_text|>",
      "<|eot_id|>",
      "<|eom_id|>"
    ],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "MetaAI",
-    "tags": [
+    "tags": ["8B", "Featured"],
      "8B", "Featured"
    ],
    "size": 4920000000
  },
  "engine": "llama-cpp"
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
 import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
 import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
 import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
 import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
@ -106,6 +113,12 @@ export default defineConfig([
        qwen2514bJson,
        qwen2532bJson,
        qwen2572bJson,
        deepseekR1DistillQwen_1_5b,
        deepseekR1DistillQwen_7b,
        deepseekR1DistillQwen_14b,
        deepseekR1DistillQwen_32b,
        deepseekR1DistillLlama_8b,
        deepseekR1DistillLlama_70b,
      ]),
      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      SETTINGS: JSON.stringify(defaultSettingJson),
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
    // Run the process watchdog
-    const systemInfo = await systemInformation()
+    // const systemInfo = await systemInformation()
-    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
+    this.queue.add(() => executeOnMain(NODE, 'run'))
    this.queue.add(() => this.healthz())
    this.subscribeToEvents()
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
 * Spawns a Nitro subprocess.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
-function run(systemInfo?: SystemInformation): Promise<any> {
+function run(): Promise<any> {
  log(`[CORTEX]:: Spawning cortex subprocess...`)
  return new Promise<void>(async (resolve, reject) => {
-    let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
+    // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
-    let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}`
+    let binaryName = `cortex-server${
      process.platform === 'win32' ? '.exe' : ''
    }`
    const binPath = path.join(__dirname, '..', 'bin')
    const executablePath = path.join(binPath, binaryName)
    addEnvPaths(binPath)
    const sharedPath = path.join(appResourcePath(), 'shared')
    // Execute the binary
    log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
        `${path.join(dataFolderPath, '.janrc')}`,
        '--data_folder_path',
        dataFolderPath,
        '--loglevel',
        'INFO',
      ],
      {
        env: {
          ...process.env,
-          CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
+          // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
+          // // Vulkan - Support 1 device at a time for now
-          ...(gpuVisibleDevices?.length > 0 && {
+          // ...(gpuVisibleDevices?.length > 0 && {
-            GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
+          //   GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
-          }),
+          // }),
        },
        cwd: sharedPath,
      }
@ -71,6 +78,22 @@ function dispose() {
  watchdog?.terminate()
 }
 /**
 * Set the environment paths for the cortex subprocess
 * @param dest
 */
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
      dest
    )
  }
 }
 /**
 * Cortex process info
 */
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -15,8 +15,6 @@ import {
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
 import { downloadModel } from './legacy/download'
 import { systemInformation } from '@janhq/core'
 import { deleteModelFiles } from './legacy/delete'
 export enum Settings {
@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension {
   * @returns A Promise that resolves when the model is downloaded.
   */
  async pullModel(model: string, id?: string, name?: string): Promise<void> {
    if (id) {
      const model: Model = ModelManager.instance().get(id)
      // Clip vision model - should not be handled by cortex.cpp
      // TensorRT model - should not be handled by cortex.cpp
      if (
        model &&
        (model.engine === InferenceEngine.nitro_tensorrt_llm ||
          model.settings.vision_model)
      ) {
        return downloadModel(model, (await systemInformation()).gpuSetting)
      }
    }
    /**
     * Sending POST to /models/pull/{id} endpoint to pull the model
     */
--- a/extensions/model-extension/src/legacy/download.ts
+++ b/extensions/model-extension/src/legacy/download.ts
@ -2,15 +2,12 @@ import {
  downloadFile,
  DownloadRequest,
  fs,
  GpuSetting,
  InferenceEngine,
  joinPath,
  Model,
 } from '@janhq/core'
 export const downloadModel = async (
  model: Model,
  gpuSettings?: GpuSetting,
  network?: { ignoreSSL?: boolean; proxy?: string }
 ): Promise<void> => {
  const homedir = 'file://models'
@ -27,41 +24,6 @@ export const downloadModel = async (
      JSON.stringify(model, null, 2)
    )
  if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
    if (!gpuSettings || gpuSettings.gpus.length === 0) {
      console.error('No GPU found. Please check your GPU setting.')
      return
    }
    const firstGpu = gpuSettings.gpus[0]
    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
      console.error('No Nvidia GPU found. Please check your GPU setting.')
      return
    }
    const gpuArch = firstGpu.arch
    if (gpuArch === undefined) {
      console.error('No GPU architecture found. Please check your GPU setting.')
      return
    }
    if (!supportedGpuArch.includes(gpuArch)) {
      console.debug(
        `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
      )
      return
    }
    const os = 'windows' // TODO: remove this hard coded value
    const newSources = model.sources.map((source) => {
      const newSource = { ...source }
      newSource.url = newSource.url
        .replace(/<os>/g, os)
        .replace(/<gpuarch>/g, gpuArch)
      return newSource
    })
    model.sources = newSources
  }
  console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
  if (model.sources.length > 1) {
--- a/extensions/monitoring-extension/README.md
+++ b/extensions/monitoring-extension/README.md
@ -1,75 +0,0 @@
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/monitoring-extension/bin/.gitkeep
+++ b/extensions/monitoring-extension/bin/.gitkeep
--- a/extensions/monitoring-extension/download.bat
+++ b/extensions/monitoring-extension/download.bat
@ -1,2 +0,0 @@
@echo off
 .\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@ -1,49 +0,0 @@
 {
  "name": "@janhq/monitoring-extension",
  "productName": "System Monitoring",
  "version": "1.0.10",
  "description": "Provides system health and OS level data.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
    "build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
    "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
    "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
    "download-artifacts:win32": "download.bat",
    "download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/node/index.cjs.js"
  },
  "devDependencies": {
    "@types/node": "^20.11.4",
    "@types/node-os-utils": "^1.3.4",
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "rimraf": "^3.0.2",
    "rolldown": "1.0.0-beta.1",
    "run-script-os": "^1.1.6",
    "typescript": "^5.3.3"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "node-os-utils": "^1.3.7"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "node-os-utils",
    "@janhq/core"
  ],
  "installConfig": {
    "hoistingLimits": "workspaces"
  },
  "packageManager": "yarn@4.5.3"
 }
--- a/extensions/monitoring-extension/resources/settings.json
+++ b/extensions/monitoring-extension/resources/settings.json
@ -1,22 +0,0 @@
 [
  {
    "key": "log-enabled",
    "title": "Enable App Logs",
    "description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "log-cleaning-interval",
    "title": "Log Cleaning Interval",
    "description": "Automatically delete local logs after a certain time interval (in milliseconds).",
    "controllerType": "input",
    "controllerProps": {
      "value": "120000",
      "placeholder": "Interval in milliseconds. E.g. 120000",
      "textAlign": "right"
    }
  }
 ]
--- a/extensions/monitoring-extension/rolldown.config.mjs
+++ b/extensions/monitoring-extension/rolldown.config.mjs
@ -1,32 +0,0 @@
 import { defineConfig } from 'rolldown'
 import packageJson from './package.json' with { type: 'json' }
 import settingJson from './resources/settings.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
    output: {
      format: 'esm',
      file: 'dist/index.js',
    },
    platform: 'browser',
    define: {
      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      SETTINGS: JSON.stringify(settingJson),
    },
  },
  {
    input: 'src/node/index.ts',
    external: ['@janhq/core/node'],
    output: {
      format: 'cjs',
      file: 'dist/node/index.cjs.js',
      sourcemap: false,
      inlineDynamicImports: true,
    },
    resolve: {
      extensions: ['.js', '.ts', '.json'],
    },
    platform: 'node',
  },
 ])
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@ -1,19 +0,0 @@
 declare const NODE: string
 declare const SETTINGS: SettingComponentProps[]
 type CpuGpuInfo = {
  cpu: {
    usage: number
  }
  gpu: GpuInfo[]
 }
 type GpuInfo = {
  id: string
  name: string
  temperature: string
  utilization: string
  memoryTotal: string
  memoryFree: string
  memoryUtilization: string
 }
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,90 +0,0 @@
 import {
  AppConfigurationEventName,
  GpuSetting,
  MonitoringExtension,
  OperatingSystemInfo,
  events,
  executeOnMain,
 } from '@janhq/core'
 enum Settings {
  logEnabled = 'log-enabled',
  logCleaningInterval = 'log-cleaning-interval',
 }
 /**
 * JanMonitoringExtension is a extension that provides system monitoring functionality.
 * It implements the MonitoringExtension interface from the @janhq/core package.
 */
 export default class JanMonitoringExtension extends MonitoringExtension {
  /**
   * Called when the extension is loaded.
   */
  async onLoad() {
    // Register extension settings
    this.registerSettings(SETTINGS)
    const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
    const logCleaningInterval = parseInt(
      await this.getSetting<string>(Settings.logCleaningInterval, '120000')
    )
    // Register File Logger provided by this extension
    await executeOnMain(NODE, 'registerLogger', {
      logEnabled,
      logCleaningInterval: isNaN(logCleaningInterval)
        ? 120000
        : logCleaningInterval,
    })
    // Attempt to fetch nvidia info
    await executeOnMain(NODE, 'updateNvidiaInfo')
    events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
  }
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.logEnabled) {
      executeOnMain(NODE, 'updateLogger', { logEnabled: value })
    } else if (key === Settings.logCleaningInterval) {
      executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
    }
  }
  /**
   * Called when the extension is unloaded.
   */
  onUnload(): void {
    // Register File Logger provided by this extension
    executeOnMain(NODE, 'unregisterLogger')
  }
  /**
   * Returns the GPU configuration.
   * @returns A Promise that resolves to an object containing the GPU configuration.
   */
  async getGpuSetting(): Promise<GpuSetting | undefined> {
    return executeOnMain(NODE, 'getGpuConfig')
  }
  /**
   * Returns information about the system resources.
   * @returns A Promise that resolves to an object containing information about the system resources.
   */
  getResourcesInfo(): Promise<any> {
    return executeOnMain(NODE, 'getResourcesInfo')
  }
  /**
   * Returns information about the current system load.
   * @returns A Promise that resolves to an object containing information about the current system load.
   */
  getCurrentLoad(): Promise<any> {
    return executeOnMain(NODE, 'getCurrentLoad')
  }
  /**
   * Returns information about the OS
   * @returns
   */
  getOsInfo(): Promise<OperatingSystemInfo> {
    return executeOnMain(NODE, 'getOsInfo')
  }
 }
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -1,389 +0,0 @@
 import {
  GpuSetting,
  GpuSettingInfo,
  LoggerManager,
  OperatingSystemInfo,
  ResourceInfo,
  SupportedPlatforms,
  getJanDataFolderPath,
  log,
 } from '@janhq/core/node'
 import { mem, cpu } from 'node-os-utils'
 import { exec } from 'child_process'
 import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
 import path from 'path'
 import os from 'os'
 import { FileLogger } from './logger'
 /**
 * Path to the settings directory
 **/
 export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
 /**
 * Path to the settings file
 **/
 export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
 /**
 * Default GPU settings
 * TODO: This needs to be refactored to support multiple accelerators
 **/
 const DEFAULT_SETTINGS: GpuSetting = {
  notify: true,
  run_mode: 'cpu',
  nvidia_driver: {
    exist: false,
    version: '',
  },
  cuda: {
    exist: false,
    version: '',
  },
  gpus: [],
  gpu_highest_vram: '',
  gpus_in_use: [],
  is_initial: true,
  // TODO: This needs to be set based on user toggle in settings
  vulkan: false,
 }
 export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
  if (process.platform === 'darwin') return undefined
  if (existsSync(GPU_INFO_FILE))
    return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
  return DEFAULT_SETTINGS
 }
 export const getResourcesInfo = async (): Promise<ResourceInfo> => {
  const ramUsedInfo = await mem.used()
  const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
  const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
  const resourceInfo: ResourceInfo = {
    mem: {
      totalMemory,
      usedMemory,
    },
  }
  return resourceInfo
 }
 export const getCurrentLoad = () =>
  new Promise<CpuGpuInfo>(async (resolve, reject) => {
    const cpuPercentage = await cpu.usage()
    let data = {
      run_mode: 'cpu',
      gpus_in_use: [],
    }
    if (process.platform !== 'darwin') {
      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    }
    if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
      const gpuIds = data.gpus_in_use.join(',')
      if (gpuIds !== '' && data['vulkan'] !== true) {
        exec(
          `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
          (error, stdout, _) => {
            if (error) {
              console.error(`exec error: ${error}`)
              throw new Error(error.message)
            }
            const gpuInfo: GpuInfo[] = stdout
              .trim()
              .split('\n')
              .map((line) => {
                const [
                  id,
                  name,
                  temperature,
                  utilization,
                  memoryTotal,
                  memoryFree,
                  memoryUtilization,
                ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
                return {
                  id,
                  name,
                  temperature,
                  utilization,
                  memoryTotal,
                  memoryFree,
                  memoryUtilization,
                }
              })
            resolve({
              cpu: { usage: cpuPercentage },
              gpu: gpuInfo,
            })
          }
        )
      } else {
        // Handle the case where gpuIds is empty
        resolve({
          cpu: { usage: cpuPercentage },
          gpu: [],
        })
      }
    } else {
      // Handle the case where run_mode is not 'gpu' or no GPUs are in use
      resolve({
        cpu: { usage: cpuPercentage },
        gpu: [],
      })
    }
  })
 /**
 * This will retrieve GPU information and persist settings.json
 * Will be called when the extension is loaded to turn on GPU acceleration if supported
 */
 export const updateNvidiaInfo = async () => {
  // ignore if macos
  if (process.platform === 'darwin') return
  try {
    JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
  } catch (error) {
    if (!existsSync(SETTINGS_DIR)) {
      mkdirSync(SETTINGS_DIR, {
        recursive: true,
      })
    }
    writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
  }
  await updateNvidiaDriverInfo()
  await updateGpuInfo()
 }
 const updateNvidiaDriverInfo = async () =>
  new Promise((resolve, reject) => {
    exec(
      'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
      (error, stdout) => {
        const data: GpuSetting = JSON.parse(
          readFileSync(GPU_INFO_FILE, 'utf-8')
        )
        if (!error) {
          const firstLine = stdout.split('\n')[0].trim()
          data.nvidia_driver.exist = true
          data.nvidia_driver.version = firstLine
        } else {
          data.nvidia_driver.exist = false
        }
        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
        resolve({})
      }
    )
  })
 const getGpuArch = (gpuName: string): string => {
  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
  if (gpuName.includes('30')) return 'ampere'
  else if (gpuName.includes('40')) return 'ada'
  else return 'unknown'
 }
 const updateGpuInfo = async () =>
  new Promise((resolve, reject) => {
    let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    // Cuda
    if (data.vulkan === true) {
      // Vulkan
      exec(
        process.platform === 'win32'
          ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
          : `${__dirname}/../bin/vulkaninfo --summary`,
        async (error, stdout) => {
          if (!error) {
            const output = stdout.toString()
            log(output)
            const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
            const gpus: GpuSettingInfo[] = []
            let match
            while ((match = gpuRegex.exec(output)) !== null) {
              const id = match[1]
              const name = match[2]
              const arch = getGpuArch(name)
              gpus.push({ id, vram: '0', name, arch })
            }
            data.gpus = gpus
            if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
              data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
            }
            data = await updateCudaExistence(data)
            writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
            log(`[APP]::${JSON.stringify(data)}`)
            resolve({})
          } else {
            reject(error)
          }
        }
      )
    } else {
      exec(
        'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
        async (error, stdout) => {
          if (!error) {
            log(`[SPECS]::${stdout}`)
            // Get GPU info and gpu has higher memory first
            let highestVram = 0
            let highestVramId = '0'
            const gpus: GpuSettingInfo[] = stdout
              .trim()
              .split('\n')
              .map((line) => {
                let [id, vram, name] = line.split(', ')
                const arch = getGpuArch(name)
                vram = vram.replace(/\r/g, '')
                if (parseFloat(vram) > highestVram) {
                  highestVram = parseFloat(vram)
                  highestVramId = id
                }
                return { id, vram, name, arch }
              })
            data.gpus = gpus
            data.gpu_highest_vram = highestVramId
          } else {
            data.gpus = []
            data.gpu_highest_vram = undefined
          }
          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
          }
          data = await updateCudaExistence(data)
          console.log('[MONITORING]::Cuda info: ', data)
          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
          log(`[APP]::${JSON.stringify(data)}`)
          resolve({})
        }
      )
    }
  })
 /**
 * Check if file exists in paths
 */
 const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
  return paths.some((p) => existsSync(path.join(p, file)))
 }
 /**
 * Validate cuda for linux and windows
 */
 const updateCudaExistence = async (
  data: GpuSetting = DEFAULT_SETTINGS
 ): Promise<GpuSetting> => {
  let filesCuda12: string[]
  let filesCuda11: string[]
  let paths: string[]
  let cudaVersion: string = ''
  if (process.platform === 'win32') {
    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
    filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
  } else {
    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : []
    paths.push('/usr/lib/x86_64-linux-gnu/')
  }
  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  )
  if (!cudaExists) {
    cudaExists = filesCuda11.every(
      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
    )
    if (cudaExists) {
      cudaVersion = '11'
    }
  } else {
    cudaVersion = '12'
  }
  data.cuda.exist = cudaExists
  data.cuda.version = cudaVersion
  console.debug(data.is_initial, data.gpus_in_use)
  if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
    data.run_mode = 'gpu'
  }
  data.is_initial = false
  // Attempt to query CUDA using NVIDIA SMI
  if (!cudaExists) {
    await new Promise<void>((resolve) => {
      exec('nvidia-smi', (error, stdout) => {
        if (!error) {
          const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
          const match = regex.exec(stdout)
          if (match && match[1]) {
            data.cuda.version = match[1]
          }
        }
        console.log('[MONITORING]::Finalized cuda info update: ', data)
        resolve()
      })
    })
  }
  return data
 }
 export const getOsInfo = (): OperatingSystemInfo => {
  const platform =
    SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
  const osInfo: OperatingSystemInfo = {
    platform: platform,
    arch: process.arch,
    release: os.release(),
    machine: os.machine(),
    version: os.version(),
    totalMem: os.totalmem(),
    freeMem: os.freemem(),
  }
  return osInfo
 }
 export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
  const logger = new FileLogger(logEnabled, logCleaningInterval)
  LoggerManager.instance().register(logger)
  logger.cleanLogs()
 }
 export const unregisterLogger = () => {
  LoggerManager.instance().unregister('file')
 }
 export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
  const logger = LoggerManager.instance().loggers.get('file') as FileLogger
  if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
  if (logger && logCleaningInterval)
    logger.logCleaningInterval = logCleaningInterval
  // Rerun
  logger && logger.cleanLogs()
 }
--- a/server/cortex.json
+++ b/server/cortex.json
@ -5,77 +5,470 @@
      "post": {
        "operationId": "AssistantsController_create",
        "summary": "Create assistant",
-        "description": "Creates a new assistant.",
+        "description": "Creates a new assistant with the specified configuration.",
        "parameters": [],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
-                "$ref": "#/components/schemas/CreateAssistantDto"
+                "type": "object",
                "properties": {
                  "model": {
                    "type": "string",
                    "description": "The model identifier to use for the assistant."
                  },
                  "name": {
                    "type": "string",
                    "description": "The name of the assistant."
                  },
                  "description": {
                    "type": "string",
                    "description": "The description of the assistant."
                  },
                  "instructions": {
                    "type": "string",
                    "description": "Instructions for the assistant's behavior."
                  },
                  "tools": {
                    "type": "array",
                    "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
                    "items": {
                      "type": "object",
                      "properties": {
                        "type": {
                          "type": "string",
                          "enum": [
                            "code_interpreter",
                            "file_search",
                            "function"
                          ]
                        }
                      }
                    }
                  },
-        "responses": {
+                  "tool_resources": {
-          "201": {
+                    "type": "object",
-            "description": "The assistant has been successfully created."
+                    "description": "Resources used by the assistant's tools.",
                    "properties": {
                      "code_interpreter": {
                        "type": "object"
                      },
                      "file_search": {
                        "type": "object"
                      }
                    }
                  },
-        "tags": ["Assistants"]
+                  "metadata": {
                    "type": "object",
                    "description": "Set of key-value pairs for the assistant.",
                    "additionalProperties": true
                  },
-      "get": {
+                  "temperature": {
-        "operationId": "AssistantsController_findAll",
+                    "type": "number",
-        "summary": "List assistants",
+                    "format": "float",
-        "description": "Returns a list of assistants.",
+                    "description": "Temperature parameter for response generation."
-        "parameters": [
+                  },
                  "top_p": {
                    "type": "number",
                    "format": "float",
                    "description": "Top p parameter for response generation."
                  },
                  "response_format": {
                    "oneOf": [
                      {
-            "name": "limit",
+                        "type": "string",
-            "required": false,
+                        "enum": ["auto"]
            "in": "query",
            "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
            "schema": {
              "type": "number"
            }
                      },
                      {
-            "name": "order",
+                        "type": "object"
-            "required": false,
+                      }
-            "in": "query",
+                    ]
            "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
            "schema": {
              "type": "string"
                  }
                },
-          {
+                "required": ["model"]
-            "name": "after",
+              }
-            "required": false,
+            }
            "in": "query",
            "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
            "schema": {
              "type": "string"
          }
        },
          {
            "name": "before",
            "required": false,
            "in": "query",
            "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Ok",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "id": {
                      "type": "string",
                      "description": "The unique identifier of the assistant."
                    },
                    "object": {
                      "type": "string",
                      "enum": ["assistant"],
                      "description": "The object type, which is always 'assistant'."
                    },
                    "created_at": {
                      "type": "integer",
                      "description": "Unix timestamp (in seconds) of when the assistant was created."
                    },
                    "model": {
                      "type": "string",
                      "description": "The model identifier used by the assistant."
                    },
                    "name": {
                      "type": "string",
                      "description": "The name of the assistant."
                    },
                    "description": {
                      "type": "string",
                      "description": "The description of the assistant."
                    },
                    "instructions": {
                      "type": "string",
                      "description": "Instructions for the assistant's behavior."
                    },
                    "tools": {
                      "type": "array",
                      "description": "A list of tools enabled on the assistant.",
                      "items": {
                        "type": "object",
                        "properties": {
                          "type": {
                            "type": "string",
                            "enum": [
                              "code_interpreter",
                              "file_search",
                              "function"
                            ]
                          }
                        }
                      }
                    },
                    "tool_resources": {
                      "type": "object",
                      "description": "Resources used by the assistant's tools.",
                      "properties": {
                        "code_interpreter": {
                          "type": "object"
                        },
                        "file_search": {
                          "type": "object"
                        }
                      }
                    },
                    "metadata": {
                      "type": "object",
                      "description": "Set of key-value pairs that can be attached to the assistant.",
                      "additionalProperties": true
                    },
                    "temperature": {
                      "type": "number",
                      "format": "float",
                      "description": "Temperature parameter for response generation."
                    },
                    "top_p": {
                      "type": "number",
                      "format": "float",
                      "description": "Top p parameter for response generation."
                    },
                    "response_format": {
                      "oneOf": [
                        {
                          "type": "string",
                          "enum": ["auto"]
                        },
                        {
                          "type": "object"
                        }
                      ]
                    }
                  },
                  "required": [
                    "id",
                    "object",
                    "created_at",
                    "model",
                    "metadata"
                  ]
                }
              }
            }
          }
        },
        "tags": ["Assistants"]
      },
      "patch": {
        "operationId": "AssistantsController_update",
        "summary": "Update assistant",
        "description": "Updates an assistant. Requires at least one modifiable field.",
        "parameters": [
          {
            "name": "id",
            "required": true,
            "in": "path",
            "description": "The unique identifier of the assistant.",
            "schema": {
              "type": "string"
            }
          },
          {
            "name": "OpenAI-Beta",
            "required": true,
            "in": "header",
            "description": "Beta feature header.",
            "schema": {
              "type": "string",
              "enum": ["assistants=v2"]
            }
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "model": {
                    "type": "string",
                    "description": "The model identifier to use for the assistant."
                  },
                  "name": {
                    "type": "string",
                    "description": "The name of the assistant."
                  },
                  "description": {
                    "type": "string",
                    "description": "The description of the assistant."
                  },
                  "instructions": {
                    "type": "string",
                    "description": "Instructions for the assistant's behavior."
                  },
                  "tools": {
                    "type": "array",
                    "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
                    "items": {
                      "type": "object",
                      "properties": {
                        "type": {
                          "type": "string",
                          "enum": [
                            "code_interpreter",
                            "file_search",
                            "function"
                          ]
                        }
                      }
                    }
                  },
                  "tool_resources": {
                    "type": "object",
                    "description": "Resources used by the assistant's tools.",
                    "properties": {
                      "code_interpreter": {
                        "type": "object"
                      },
                      "file_search": {
                        "type": "object"
                      }
                    }
                  },
                  "metadata": {
                    "type": "object",
                    "description": "Set of key-value pairs for the assistant.",
                    "additionalProperties": true
                  },
                  "temperature": {
                    "type": "number",
                    "format": "float",
                    "description": "Temperature parameter for response generation."
                  },
                  "top_p": {
                    "type": "number",
                    "format": "float",
                    "description": "Top p parameter for response generation."
                  },
                  "response_format": {
                    "oneOf": [
                      {
                        "type": "string",
                        "enum": ["auto"]
                      },
                      {
                        "type": "object"
                      }
                    ]
                  }
                },
                "minProperties": 1
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Ok",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "id": {
                      "type": "string",
                      "description": "The unique identifier of the assistant."
                    },
                    "object": {
                      "type": "string",
                      "enum": ["assistant"],
                      "description": "The object type, which is always 'assistant'."
                    },
                    "created_at": {
                      "type": "integer",
                      "description": "Unix timestamp (in seconds) of when the assistant was created."
                    },
                    "model": {
                      "type": "string",
                      "description": "The model identifier used by the assistant."
                    },
                    "name": {
                      "type": "string",
                      "description": "The name of the assistant."
                    },
                    "description": {
                      "type": "string",
                      "description": "The description of the assistant."
                    },
                    "instructions": {
                      "type": "string",
                      "description": "Instructions for the assistant's behavior."
                    },
                    "tools": {
                      "type": "array",
                      "description": "A list of tools enabled on the assistant.",
                      "items": {
                        "type": "object",
                        "properties": {
                          "type": {
                            "type": "string",
                            "enum": [
                              "code_interpreter",
                              "file_search",
                              "function"
                            ]
                          }
                        }
                      }
                    },
                    "tool_resources": {
                      "type": "object",
                      "description": "Resources used by the assistant's tools.",
                      "properties": {
                        "code_interpreter": {
                          "type": "object"
                        },
                        "file_search": {
                          "type": "object"
                        }
                      }
                    },
                    "metadata": {
                      "type": "object",
                      "description": "Set of key-value pairs that can be attached to the assistant.",
                      "additionalProperties": true
                    },
                    "temperature": {
                      "type": "number",
                      "format": "float",
                      "description": "Temperature parameter for response generation."
                    },
                    "top_p": {
                      "type": "number",
                      "format": "float",
                      "description": "Top p parameter for response generation."
                    },
                    "response_format": {
                      "oneOf": [
                        {
                          "type": "string",
                          "enum": ["auto"]
                        },
                        {
                          "type": "object"
                        }
                      ]
                    }
                  },
                  "required": [
                    "id",
                    "object",
                    "created_at",
                    "model",
                    "metadata"
                  ]
                }
              }
            }
          }
        },
        "tags": ["Assistants"]
      },
      "get": {
        "operationId": "AssistantsController_list",
        "summary": "List assistants",
        "description": "Returns a list of assistants.",
        "responses": {
          "200": {
            "description": "Ok",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "object": {
                      "type": "string",
                      "enum": ["list"],
                      "description": "The object type, which is always 'list' for a list response."
                    },
                    "data": {
                      "type": "array",
                      "items": {
-                    "$ref": "#/components/schemas/AssistantEntity"
+                        "type": "object",
                        "properties": {
                          "id": {
                            "type": "string",
                            "description": "The unique identifier of the assistant."
                          },
                          "object": {
                            "type": "string",
                            "enum": ["assistant"],
                            "description": "The object type, which is always 'assistant'."
                          },
                          "created_at": {
                            "type": "integer",
                            "description": "Unix timestamp (in seconds) of when the assistant was created."
                          },
                          "model": {
                            "type": "string",
                            "description": "The model identifier used by the assistant."
                          },
                          "metadata": {
                            "type": "object",
                            "description": "Set of key-value pairs that can be attached to the assistant.",
                            "additionalProperties": true
                          }
                        },
                        "required": [
                          "id",
                          "object",
                          "created_at",
                          "model",
                          "metadata"
                        ]
                      }
                    }
                  },
                  "required": ["object", "data"]
                }
              }
            }
@ -88,7 +481,77 @@
      "get": {
        "operationId": "AssistantsController_findOne",
        "summary": "Get assistant",
-        "description": "Retrieves a specific assistant defined by an assistant's `id`.",
+        "description": "Retrieves a specific assistant by ID.",
        "parameters": [
          {
            "name": "id",
            "required": true,
            "in": "path",
            "description": "The unique identifier of the assistant.",
            "schema": {
              "type": "string"
            }
          },
          {
            "name": "OpenAI-Beta",
            "required": true,
            "in": "header",
            "description": "Beta feature header.",
            "schema": {
              "type": "string",
              "enum": ["assistants=v2"]
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Ok",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "id": {
                      "type": "string",
                      "description": "The unique identifier of the assistant."
                    },
                    "object": {
                      "type": "string",
                      "enum": ["assistant"],
                      "description": "The object type, which is always 'assistant'."
                    },
                    "created_at": {
                      "type": "integer",
                      "description": "Unix timestamp (in seconds) of when the assistant was created."
                    },
                    "model": {
                      "type": "string",
                      "description": "The model identifier used by the assistant."
                    },
                    "metadata": {
                      "type": "object",
                      "description": "Set of key-value pairs attached to the assistant.",
                      "additionalProperties": true
                    }
                  },
                  "required": [
                    "id",
                    "object",
                    "created_at",
                    "model",
                    "metadata"
                  ]
                }
              }
            }
          }
        },
        "tags": ["Assistants"]
      },
      "delete": {
        "operationId": "AssistantsController_remove",
        "summary": "Delete assistant",
        "description": "Deletes a specific assistant by ID.",
        "parameters": [
          {
            "name": "id",
@ -106,36 +569,24 @@
            "content": {
              "application/json": {
                "schema": {
-                  "$ref": "#/components/schemas/AssistantEntity"
+                  "type": "object",
-                }
+                  "properties": {
-              }
+                    "id": {
-            }
+                      "type": "string",
                      "description": "The unique identifier of the deleted assistant."
                    },
                    "object": {
                      "type": "string",
                      "enum": ["assistant.deleted"],
                      "description": "The object type for a deleted assistant."
                    },
                    "deleted": {
                      "type": "boolean",
                      "enum": [true],
                      "description": "Indicates the assistant was successfully deleted."
                    }
                  },
-        "tags": ["Assistants"]
+                  "required": ["id", "object", "deleted"]
      },
      "delete": {
        "operationId": "AssistantsController_remove",
        "summary": "Delete assistant",
        "description": "Deletes a specific assistant defined by an assistant's `id`.",
        "parameters": [
          {
            "name": "id",
            "required": true,
            "in": "path",
            "description": "The unique identifier of the assistant.",
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "The assistant has been successfully deleted.",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/DeleteAssistantResponseDto"
                }
              }
            }
@ -2199,6 +2650,84 @@
        "tags": ["Engines"]
      }
    },
    "/engines/{name}/releases/{version}": {
      "get": {
        "summary": "List variants for a specific engine version",
        "description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
        "parameters": [
          {
            "name": "name",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string",
              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
              "default": "llama-cpp"
            },
            "description": "The type of engine"
          },
          {
            "name": "version",
            "in": "path",
            "required": true,
            "schema": {
              "type": "string"
            },
            "description": "The version of the engine"
          },
          {
            "name": "show",
            "in": "query",
            "required": false,
            "schema": {
              "type": "string",
              "enum": ["all", "compatible"],
              "default": "all"
            },
            "description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
          }
        ],
        "responses": {
          "200": {
            "description": "Successfully retrieved variants list",
            "content": {
              "application/json": {
                "schema": {
                  "type": "array",
                  "items": {
                    "type": "object",
                    "properties": {
                      "name": {
                        "type": "string",
                        "description": "The name of the variant, including OS, architecture, and capabilities",
                        "example": "linux-amd64-avx-cuda-11-7"
                      },
                      "created_at": {
                        "type": "string",
                        "format": "date-time",
                        "description": "Creation timestamp of the variant",
                        "example": "2024-11-13T04:51:16Z"
                      },
                      "size": {
                        "type": "integer",
                        "description": "Size of the variant in bytes",
                        "example": 151224604
                      },
                      "download_count": {
                        "type": "integer",
                        "description": "Number of times this variant has been downloaded",
                        "example": 0
                      }
                    }
                  }
                }
              }
            }
          }
        },
        "tags": ["Engines"]
      }
    },
    "/engines/{name}/releases/latest": {
      "get": {
        "summary": "Get latest release",
@ -2314,7 +2843,7 @@
                      "get_models_url": {
                        "type": "string",
                        "description": "The URL to get models",
-                        "example": "https://api.openai.com/v1/models"
+                        "example": "https://api.openai.com/models"
                      }
                    }
                  }
@ -3378,6 +3907,7 @@
        "Files",
        "Hardware",
        "Events",
        "Assistants",
        "Threads",
        "Messages",
        "Pulling Models",
@ -4858,8 +5388,8 @@
          "engine",
          "version",
          "inference_params",
-          "TransformReq",
+          "transform_req",
-          "TransformResp",
+          "transform_resp",
          "metadata"
        ],
        "properties": {
@ -4867,9 +5397,9 @@
            "type": "string",
            "description": "The identifier of the model."
          },
-          "api_key_template": {
+          "header_template": {
            "type": "string",
-            "description": "Template for the API key header."
+            "description": "Template for the header."
          },
          "engine": {
            "type": "string",
@ -4902,7 +5432,7 @@
              }
            }
          },
-          "TransformReq": {
+          "transform_req": {
            "type": "object",
            "properties": {
              "get_models": {
@ -4924,7 +5454,7 @@
              }
            }
          },
-          "TransformResp": {
+          "transform_resp": {
            "type": "object",
            "properties": {
              "chat_completions": {
@ -5632,9 +6162,9 @@
            "description": "Number of GPU layers.",
            "example": 33
          },
-          "api_key_template": {
+          "header_template": {
            "type": "string",
-            "description": "Template for the API key header."
+            "description": "Template for the header."
          },
          "version": {
            "type": "string",
--- a/web/containers/AutoLink/index.tsx
+++ b/web/containers/AutoLink/index.tsx
@ -10,7 +10,9 @@ const AutoLink = ({ text }: Props) => {
  return (
    <>
-      {text.split(delimiter).map((word) => {
+      {text &&
        typeof text === 'string' &&
        text.split(delimiter).map((word) => {
          const match = word.match(delimiter)
          if (match) {
            const url = match[0]
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
-const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
+const ErrorMessage = ({
  message,
  errorComponent,
 }: {
  message?: ThreadMessage
  errorComponent?: React.ReactNode
 }) => {
  const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
  const setMainState = useSetAtom(mainViewStateAtom)
  const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
  const getErrorTitle = () => {
    const engine = getEngine()
-    switch (message.metadata?.error_code) {
+    switch (message?.metadata?.error_code) {
      case ErrorCode.InvalidApiKey:
      case ErrorCode.AuthenticationError:
        return (
@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
                className="font-medium text-[hsla(var(--app-link))] underline"
                onClick={() => {
                  setMainState(MainViewState.Settings)
-                  engine?.name && setSelectedSettingScreen(engine.name)
+                  setSelectedSettingScreen(activeAssistant?.model?.engine ?? '')
                }}
              >
                Settings
@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
            data-testid="passthrough-error-message"
            className="first-letter:uppercase"
          >
-            {message.content[0]?.text?.value === 'Failed to fetch' &&
+            {message?.content[0]?.text?.value === 'Failed to fetch' &&
            engine &&
            engine?.name !== InferenceEngine.cortex_llamacpp ? (
              <span>
@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
                {message?.content[0]?.text?.value && (
                  <AutoLink text={message?.content[0]?.text?.value} />
                )}
                {!message?.content[0]?.text?.value && (
                  <span>Something went wrong. Please try again.</span>
                )}
              </>
            )}
          </p>
@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
    <div className="mx-auto my-6 max-w-[700px] px-4">
      <div
        className="mx-auto  max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]"
-        key={message.id}
+        key={message?.id}
      >
        <div className="flex justify-between border-b border-inherit px-4 py-2">
-          <h6 className="text-[hsla(var(--destructive-bg))]">Error</h6>
+          <h6 className="flex items-center gap-x-1 font-semibold text-[hsla(var(--destructive-bg))]">
-          <div className="flex gap-x-4 text-xs">
+            <span className="h-2 w-2 rounded-full bg-[hsla(var(--destructive-bg))]" />
-            <div>
+            <span>Error</span>
          </h6>
          <div className="flex items-center gap-x-4 text-xs">
            <div className="font-semibold">
              <span
                className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]"
                onClick={() => setModalTroubleShooting(true)}
@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
              <ModalTroubleShooting />
            </div>
            <div
-              className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--text-secondary))]"
+              className="flex cursor-pointer items-center gap-x-1 font-semibold text-[hsla(var(--text-secondary))]"
              onClick={handleCopy}
            >
              {copied ? (
@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
        </div>
        <div className="max-h-[80px] w-full overflow-x-auto p-4 py-2">
          <div
-            className="text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
+            className="font-serif text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
            ref={errorDivRef}
          >
-            {getErrorTitle()}
+            {errorComponent ? errorComponent : getErrorTitle()}
          </div>
        </div>
      </div>
--- a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
@ -87,7 +87,7 @@ describe('SystemMonitor', () => {
    expect(screen.getByText('Running Models')).toBeInTheDocument()
    expect(screen.getByText('App Log')).toBeInTheDocument()
-    expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument()
+    expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument()
    expect(screen.getByText('30%')).toBeInTheDocument()
  })
--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@ -134,8 +134,8 @@ const SystemMonitor = () => {
                <div className="flex items-center justify-between gap-2">
                  <h6 className="font-bold">Memory</h6>
                  <span>
-                    {toGigabytes(usedRam, { hideUnit: true })}/
+                    {toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
-                    {toGigabytes(totalRam, { hideUnit: true })} GB
+                    {toGigabytes(totalRam, { hideUnit: true })}GB
                  </span>
                </div>
                <div className="flex items-center gap-x-4">
@ -149,10 +149,12 @@ const SystemMonitor = () => {
              </div>
              {gpus.length > 0 && (
                <div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
-                  {gpus.map((gpu, index) => {
+                  {gpus
                    .filter((gpu) => gpu.activated === true)
                    .map((gpu, index) => {
                      const gpuUtilization = utilizedMemory(
-                      gpu.memoryFree,
+                        gpu.free_vram,
-                      gpu.memoryTotal
+                        gpu.total_vram
                      )
                      return (
                        <div key={index} className="mt-4 flex flex-col gap-x-2">
@ -163,8 +165,8 @@ const SystemMonitor = () => {
                            <div className="flex gap-x-2">
                              <div className="">
                                <span>
-                                {gpu.memoryTotal - gpu.memoryFree}/
+                                  {gpu.total_vram - gpu.free_vram}/
-                                {gpu.memoryTotal}
+                                  {gpu.total_vram}
                                </span>
                                <span> MB</span>
                              </div>
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
 import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
 import SelectingModelModal from '@/screens/Settings/SelectingModelModal'
 import { getAppDistinctId, updateDistinctId } from '@/utils/settings'
 import LoadingModal from '../LoadingModal'
 import MainViewContainer from '../MainViewContainer'
@ -96,8 +98,16 @@ const BaseLayout = () => {
          return properties
        },
      })
      // Attempt to restore distinct Id from app global settings
      getAppDistinctId()
        .then((id) => {
          if (id) posthog.identify(id)
        })
        .finally(() => {
          posthog.opt_in_capturing()
          posthog.register({ app_version: VERSION })
          updateDistinctId(posthog.get_distinct_id())
        })
    } else {
      posthog.opt_out_capturing()
    }
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel'
 import SetupRemoteModel from '@/containers/SetupRemoteModel'
 import { useActiveModel } from '@/hooks/useActiveModel'
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import useDownloadModel from '@/hooks/useDownloadModel'
 import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
 import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'
 import { manualRecommendationModel } from '@/utils/model'
-import { getLogoEngine } from '@/utils/modelEngine'
+import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine'
 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import {
@ -93,6 +95,7 @@ const ModelDropdown = ({
  const { updateModelParameter } = useUpdateModelParameters()
  const searchInputRef = useRef<HTMLInputElement>(null)
  const configuredModels = useAtomValue(configuredModelsAtom)
  const { stopModel } = useActiveModel()
  const featuredModels = configuredModels.filter(
    (x) =>
@ -226,6 +229,7 @@ const ModelDropdown = ({
      const model = downloadedModels.find((m) => m.id === modelId)
      setSelectedModel(model)
      setOpen(false)
      stopModel()
      if (activeThread) {
        // Change assistand tools based on model support RAG
@ -248,18 +252,13 @@ const ModelDropdown = ({
          ],
        })
-        const defaultContextLength = Math.min(
+        const contextLength = model?.settings.ctx_len
-          8192,
+          ? Math.min(8192, model?.settings.ctx_len ?? 8192)
-          model?.settings.ctx_len ?? 8192
+          : undefined
        )
        const overriddenParameters = {
-          ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined,
+          ctx_len: contextLength,
-          max_tokens: defaultContextLength
+          max_tokens: contextLength
-            ? Math.min(
+            ? Math.min(model?.parameters.max_tokens ?? 8192, contextLength)
                model?.parameters.max_tokens ?? 8192,
                defaultContextLength
              )
            : model?.parameters.max_tokens,
        }
@ -289,6 +288,7 @@ const ModelDropdown = ({
      updateThreadMetadata,
      setThreadModelParams,
      updateModelParameter,
      stopModel,
    ]
  )
@ -429,7 +429,7 @@ const ModelDropdown = ({
                            />
                          )}
                          <h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
-                            {engine.name}
+                            {getTitleByEngine(engine.name)}
                          </h6>
                        </div>
                        <div className="-mr-2 flex gap-1">
@ -475,7 +475,7 @@ const ModelDropdown = ({
                                >
                                  <div className="flex items-center gap-2">
                                    <p
-                                      className="line-clamp-1 text-[hsla(var(--text-secondary))]"
+                                      className="max-w-[200px] overflow-hidden truncate whitespace-nowrap text-[hsla(var(--text-secondary))]"
                                      title={model.name}
                                    >
                                      {model.name}
@ -549,6 +549,8 @@ const ModelDropdown = ({
                              (c) => c.id === model.id
                            )
                            return (
                              <>
                                {isDownloaded && (
                                  <li
                                    key={model.id}
                                    className={twMerge(
@ -558,7 +560,10 @@ const ModelDropdown = ({
                                        : 'text-[hsla(var(--text-primary))]'
                                    )}
                                    onClick={() => {
-                                  if (!isConfigured && engine.type === 'remote')
+                                      if (
                                        !isConfigured &&
                                        engine.type === 'remote'
                                      )
                                        return null
                                      if (isDownloaded) {
                                        onClickModelItem(model.id)
@ -568,7 +573,7 @@ const ModelDropdown = ({
                                    <div className="flex gap-x-2">
                                      <p
                                        className={twMerge(
-                                      'line-clamp-1',
+                                          'max-w-[200px] overflow-hidden truncate whitespace-nowrap',
                                          !isDownloaded &&
                                            'text-[hsla(var(--text-secondary))]'
                                        )}
@ -618,6 +623,8 @@ const ModelDropdown = ({
                                      )}
                                    </div>
                                  </li>
                                )}
                              </>
                            )
                          })}
                      </ul>
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => {
  const { settings } = useSettings()
  const getLabel = (size: number) => {
-    const minimumRamModel = size * 1.25
+    const minimumRamModel = (size * 1.25) / (1024 * 1024)
-    const availableRam =
+
-      settings?.run_mode === 'gpu'
+    const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
      ? availableVram * 1000000 // MB to bytes
-        : totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
+      : totalRam -
        (usedRam +
          (activeModel?.metadata?.size
            ? (activeModel.metadata.size * 1.25) / (1024 * 1024)
            : 0))
    if (minimumRamModel > totalRam) {
      return (
        <NotEnoughMemoryLabel
-          unit={settings?.run_mode === 'gpu' ? 'VRAM' : 'RAM'}
+          unit={settings?.gpus?.some((gpu) => gpu.activated) ? 'VRAM' : 'RAM'}
          compact={compact}
        />
      )
--- a/web/containers/Providers/ModelHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@ -143,8 +143,7 @@ export default function ModelHandler() {
        return
      }
-      // The thread title should not be updated if the message is less than 10 words
+      // No new line character is presented in the title
      // And no new line character is present
      // And non-alphanumeric characters should be removed
      if (messageContent.includes('\n')) {
        messageContent = messageContent.replace(/\n/g, ' ')
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@ -93,14 +93,8 @@ const ServerLogs = (props: ServerLogsProps) => {
  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
  return (
-    <ScrollArea
+    <>
-      ref={listRef}
+      <div>
      className={twMerge(
        'h-[calc(100%-49px)] w-full p-4 py-0',
        logs.length === 0 && 'mx-auto'
      )}
      onScroll={handleScroll}
    >
        {withCopy && (
          <div className="absolute right-2 top-7">
            <div className="flex w-full flex-row gap-2">
@ -140,6 +134,15 @@ const ServerLogs = (props: ServerLogsProps) => {
            </div>
          </div>
        )}
      </div>
      <ScrollArea
        ref={listRef}
        className={twMerge(
          'h-[calc(100%-49px)] w-full p-4 py-0',
          logs.length === 0 && 'mx-auto'
        )}
        onScroll={handleScroll}
      >
        <div className="flex h-full w-full flex-col">
          {logs.length > 0 ? (
            <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
@ -155,7 +158,7 @@ const ServerLogs = (props: ServerLogsProps) => {
            <div
              className={twMerge(
                'mt-24 flex w-full flex-col items-center justify-center',
-              withCopy && 'mt-0 py-2'
+                withCopy && 'mt-4 py-2'
              )}
            >
              <svg
@ -287,11 +290,14 @@ const ServerLogs = (props: ServerLogsProps) => {
                  </linearGradient>
                </defs>
              </svg>
-            <p className="text-[hsla(var(--text-secondary)] mt-4">Empty logs</p>
+              <p className="text-[hsla(var(--text-secondary)] mt-4">
                Empty logs
              </p>
            </div>
          )}
        </div>
      </ScrollArea>
    </>
  )
 }
--- a/web/containers/SliderRightPanel/index.tsx
+++ b/web/containers/SliderRightPanel/index.tsx
@ -73,7 +73,7 @@ const SliderRightPanel = ({
          trigger={
            <Input
              type="text"
-              className="-mt-4 h-8 w-[60px]"
+              className="-mt-4 h-8 w-[68px]"
              min={min}
              max={max}
              value={val}
--- a/web/helpers/atoms/App.atom.ts
+++ b/web/helpers/atoms/App.atom.ts
@ -8,6 +8,8 @@ export const mainViewStateAtom = atom<MainViewState>(MainViewState.Thread)
 export const defaultJanDataFolderAtom = atom<string>('')
 export const LocalEngineDefaultVariantAtom = atom<string>('')
 const SHOW_RIGHT_PANEL = 'showRightPanel'
 // Store panel atom
--- a/Show More
+++ b/Show More
		`@ -1,2 +0,0 @@`
			`export * from './monitoringInterface'`
			`export * from './resourceInfo'`
		`@ -1,2 +0,0 @@`
			`@echo off`
			`.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin`