Merge pull request #4683 from janhq/chore/sync-release-to-dev

chore: sync release v0.5.15 branch into dev branch
2025-02-18 18:40:03 +07:00 · 2025-02-18 18:40:03 +07:00 · c4d7a143eb
commit c4d7a143eb
parent 29a2a171d5 7d5aa46eef
150 changed files with 3400 additions and 2643 deletions
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@ -9,31 +9,6 @@ jobs:
  get-update-version:
    uses: ./.github/workflows/template-get-update-version.yml

-  create-draft-release:
-    runs-on: ubuntu-latest
-    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
-    outputs:
-      upload_url: ${{ steps.create_release.outputs.upload_url }}
-      version: ${{ steps.get_version.outputs.version }}
-    permissions:
-      contents: write
-    steps:
-      - name: Extract tag name without v prefix
-        id: get_version
-        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
-        env:
-          GITHUB_REF: ${{ github.ref }}
-      - name: Create Draft Release
-        id: create_release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: ${{ github.ref_name }}
-          token: ${{ secrets.GITHUB_TOKEN }}
-          name: "${{ env.VERSION }}"
-          draft: true
-          prerelease: false
-          generate_release_notes: true
-
  build-macos:
    uses: ./.github/workflows/template-build-macos.yml
    secrets: inherit
@ -65,7 +40,7 @@ jobs:
      beta: true

  sync-temp-to-latest:
-    needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64]
+    needs: [build-macos, build-windows-x64, build-linux-x64]
    runs-on: ubuntu-latest
    permissions:
      contents: write
@ -82,19 +57,15 @@ jobs:
          AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
          AWS_EC2_METADATA_DISABLED: "true"

-      - name: set release to prerelease
-        run: |
-          gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
  noti-discord-and-update-url-readme:
-    needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest]
+    needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
    runs-on: ubuntu-latest
    steps:
      - name: Set version to environment variable
        run: |
-          echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
+          VERSION=${{ needs.get-update-version.outputs.new_version }}
+          VERSION="${VERSION#v}"
+          echo "VERSION=$VERSION" >> $GITHUB_ENV

      - name: Notify Discord
        uses: Ilshidur/action-discord@master
@ -105,6 +76,5 @@ jobs:
            - macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
            - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
            - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
-            - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
        env:
          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
--- a/.github/workflows/template-build-jan-server.yml
+++ b/.github/workflows/template-build-jan-server.yml
@ -1,39 +0,0 @@
-name: build-jan-server
-on:
-  workflow_call:
-    inputs:
-      dockerfile_path:
-        required: false
-        type: string
-        default: './Dockerfile'
-      docker_image_tag:
-        required: true
-        type: string
-        default: 'ghcr.io/janhq/jan-server:dev-latest'
-
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    env:
-      REGISTRY: ghcr.io
-      IMAGE_NAME: janhq/jan-server
-    permissions:
-      packages: write
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-
-      - name: Log in to the Container registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
-      - name: Build and push Docker image
-        uses: docker/build-push-action@v3
-        with:
-          context: .
-          file: ${{ inputs.dockerfile_path }}
-          push: true
-          tags: ${{ inputs.docker_image_tag }}
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@ -83,7 +83,7 @@ jobs:
          cat ./electron/package.json
          echo "------------------------"
          cat ./package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json

--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@ -99,7 +99,7 @@ jobs:
          cat ./electron/package.json
          echo "------------------------"
          cat ./package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json

--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@ -108,7 +108,7 @@ jobs:
          cat ./package.json
          echo "------------------------"
          cat ./electron/scripts/uninstaller.nsh
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json

--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
  SystemMonitoring = 'systemMonitoring',
  HuggingFace = 'huggingFace',
  Engine = 'engine',
+  Hardware = 'hardware',
 }

 export interface ExtensionType {
--- a/core/src/browser/extensions/engines/OAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.test.ts
@ -38,8 +38,14 @@ describe('OAIEngine', () => {

  it('should subscribe to events on load', () => {
    engine.onLoad()
-    expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function))
-    expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function))
+    expect(events.on).toHaveBeenCalledWith(
+      MessageEvent.OnMessageSent,
+      expect.any(Function)
+    )
+    expect(events.on).toHaveBeenCalledWith(
+      InferenceEvent.OnInferenceStopped,
+      expect.any(Function)
+    )
  })

  it('should handle inference request', async () => {
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
    expect(events.emit).toHaveBeenCalledWith(
      MessageEvent.OnMessageUpdate,
      expect.objectContaining({
-        content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }],
+        content: [
+          {
+            type: ContentType.Text,
+            text: { value: 'test response', annotations: [] },
+          },
+        ],
        status: MessageStatus.Ready,
      })
    )
@ -101,11 +112,10 @@ describe('OAIEngine', () => {

    await engine.inference(data)

-    expect(events.emit).toHaveBeenCalledWith(
+    expect(events.emit).toHaveBeenLastCalledWith(
      MessageEvent.OnMessageUpdate,
      expect.objectContaining({
-        content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }],
-        status: MessageStatus.Error,
+        status: 'error',
        error_code: 500,
      })
    )
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
   */
  override onLoad() {
    super.onLoad()
-    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
+      this.inference(data)
+    )
    events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
  }

@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      complete: async () => {
-        message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+        message.status = message.content.length
+          ? MessageStatus.Ready
+          : MessageStatus.Error
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      error: async (err: any) => {
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
        message.content[0] = {
          type: ContentType.Text,
          text: {
-            value: err.message,
+            value:
+              typeof message === 'string'
+                ? err.message
+                : (JSON.stringify(err.message) ?? err.detail),
            annotations: [],
          },
        }
--- a/core/src/browser/extensions/engines/helpers/sse.test.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.test.ts
@ -1,14 +1,17 @@
 import { lastValueFrom, Observable } from 'rxjs'
 import { requestInference } from './sse'

-import { ReadableStream } from 'stream/web';
+import { ReadableStream } from 'stream/web'
 describe('requestInference', () => {
  it('should send a request to the inference server and return an Observable', () => {
    // Mock the fetch function
    const mockFetch: any = jest.fn(() =>
      Promise.resolve({
        ok: true,
-        json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+        json: () =>
+          Promise.resolve({
+            choices: [{ message: { content: 'Generated response' } }],
+          }),
        headers: new Headers(),
        redirected: false,
        status: 200,
@ -36,7 +39,10 @@ describe('requestInference', () => {
    const mockFetch: any = jest.fn(() =>
      Promise.resolve({
        ok: false,
-        json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }),
+        json: () =>
+          Promise.resolve({
+            error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
+          }),
        headers: new Headers(),
        redirected: false,
        status: 401,
@ -56,7 +62,10 @@ describe('requestInference', () => {

    // Assert the expected behavior
    expect(result).toBeInstanceOf(Observable)
-    expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' })
+    expect(lastValueFrom(result)).rejects.toEqual({
+      message: 'Invalid API Key.',
+      code: 'invalid_api_key',
+    })
  })
 })

@ -65,7 +74,10 @@ describe('requestInference', () => {
  const mockFetch: any = jest.fn(() =>
    Promise.resolve({
      ok: true,
-        json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+      json: () =>
+        Promise.resolve({
+          choices: [{ message: { content: 'Generated response' } }],
+        }),
      headers: new Headers(),
      redirected: false,
      status: 200,
@ -78,17 +90,24 @@ describe('requestInference', () => {
  const inferenceUrl = 'https://inference-server.com'
  const requestBody = { message: 'Hello' }
  const model = { id: 'model-id', parameters: { stream: false } }
-    const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase()
+  const transformResponse = (data: any) =>
+    data.choices[0].message.content.toUpperCase()

  // Call the function
-    const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse)
+  const result = requestInference(
+    inferenceUrl,
+    requestBody,
+    model,
+    undefined,
+    undefined,
+    transformResponse
+  )

  // Assert the expected behavior
  expect(result).toBeInstanceOf(Observable)
  expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
 })

-
 it('should handle a successful response with streaming enabled', () => {
  // Mock the fetch function
  const mockFetch: any = jest.fn(() =>
@ -96,29 +115,32 @@ describe('requestInference', () => {
      ok: true,
      body: new ReadableStream({
        start(controller) {
-            controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}'));
-            controller.enqueue(new TextEncoder().encode('data: [DONE]'));
-            controller.close();
-          }
+          controller.enqueue(
+            new TextEncoder().encode(
+              'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
+            )
+          )
+          controller.enqueue(new TextEncoder().encode('data: [DONE]'))
+          controller.close()
+        },
      }),
      headers: new Headers(),
      redirected: false,
      status: 200,
      statusText: 'OK',
    })
-    );
-    jest.spyOn(global, 'fetch').mockImplementation(mockFetch);
+  )
+  jest.spyOn(global, 'fetch').mockImplementation(mockFetch)

  // Define the test inputs
-    const inferenceUrl = 'https://inference-server.com';
-    const requestBody = { message: 'Hello' };
-    const model = { id: 'model-id', parameters: { stream: true } };
+  const inferenceUrl = 'https://inference-server.com'
+  const requestBody = { message: 'Hello' }
+  const model = { id: 'model-id', parameters: { stream: true } }

  // Call the function
-    const result = requestInference(inferenceUrl, requestBody, model);
+  const result = requestInference(inferenceUrl, requestBody, model)

  // Assert the expected behavior
-    expect(result).toBeInstanceOf(Observable);
-    expect(lastValueFrom(result)).resolves.toEqual('Streamed');
-  });
-
+  expect(result).toBeInstanceOf(Observable)
+  expect(lastValueFrom(result)).resolves.toEqual('Streamed')
+})
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -32,21 +32,20 @@ export function requestInference(
    })
      .then(async (response) => {
        if (!response.ok) {
-          const data = await response.json()
-          let errorCode = ErrorCode.Unknown
-          if (data.error) {
-            errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown
-          } else if (response.status === 401) {
-            errorCode = ErrorCode.InvalidApiKey
+          if (response.status === 401) {
+            throw {
+              code: ErrorCode.InvalidApiKey,
+              message: 'Invalid API Key.',
            }
-          const error = {
-            message: data.error?.message ?? data.message ?? 'Error occurred.',
-            code: errorCode,
          }
-          subscriber.error(error)
-          subscriber.complete()
+          let data = await response.json()
+          try {
+            handleError(data)
+          } catch (err) {
+            subscriber.error(err)
            return
          }
+        }
        // There could be overriden stream parameter in the model
        // that is set in request body (transformed payload)
        if (
@ -54,9 +53,10 @@ export function requestInference(
          model.parameters?.stream === false
        ) {
          const data = await response.json()
-          if (data.error || data.message) {
-            subscriber.error(data.error ?? data)
-            subscriber.complete()
+          try {
+            handleError(data)
+          } catch (err) {
+            subscriber.error(err)
            return
          }
          if (transformResponse) {
@ -91,13 +91,10 @@ export function requestInference(
                  const toParse = cachedLines + line
                  if (!line.includes('data: [DONE]')) {
                    const data = JSON.parse(toParse.replace('data: ', ''))
-                    if (
-                      'error' in data ||
-                      'message' in data ||
-                      'detail' in data
-                    ) {
-                      subscriber.error(data.error ?? data)
-                      subscriber.complete()
+                    try {
+                      handleError(data)
+                    } catch (err) {
+                      subscriber.error(err)
                      return
                    }
                    content += data.choices[0]?.delta?.content ?? ''
@ -118,3 +115,18 @@ export function requestInference(
      .catch((err) => subscriber.error(err))
  })
 }
+
+/**
+ * Handle error and normalize it to a common format.
+ * @param data
+ */
+const handleError = (data: any) => {
+  if (
+    data.error ||
+    data.message ||
+    data.detail ||
+    (Array.isArray(data) && data.length && data[0].error)
+  ) {
+    throw data.error ?? data[0]?.error ?? data
+  }
+}
--- a/core/src/browser/extensions/hardwareManagement.ts
+++ b/core/src/browser/extensions/hardwareManagement.ts
@ -0,0 +1,26 @@
+import { HardwareInformation } from '../../types'
+import { BaseExtension, ExtensionTypeEnum } from '../extension'
+
+/**
+ * Engine management extension. Persists and retrieves engine management.
+ * @abstract
+ * @extends BaseExtension
+ */
+export abstract class HardwareManagementExtension extends BaseExtension {
+  type(): ExtensionTypeEnum | undefined {
+    return ExtensionTypeEnum.Hardware
+  }
+
+  /**
+   * @returns A Promise that resolves to an object of list hardware.
+   */
+  abstract getHardware(): Promise<HardwareInformation>
+
+  /**
+   * @returns A Promise that resolves to an object of set active gpus.
+   */
+  abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
+    message: string
+    activated_gpus: number[]
+  }>
+}
--- a/core/src/browser/extensions/index.test.ts
+++ b/core/src/browser/extensions/index.test.ts
@ -1,6 +1,5 @@
 import { ConversationalExtension } from './index';
 import { InferenceExtension } from './index';
-import { MonitoringExtension } from './index';
 import { AssistantExtension } from './index';
 import { ModelExtension } from './index';
 import * as Engines from './index';
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
    expect(InferenceExtension).toBeDefined();
  });

-  test('should export MonitoringExtension', () => {
-    expect(MonitoringExtension).toBeDefined();
-  });
-
  test('should export AssistantExtension', () => {
    expect(AssistantExtension).toBeDefined();
  });
--- a/core/src/browser/extensions/index.ts
+++ b/core/src/browser/extensions/index.ts
@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
 */
 export { InferenceExtension } from './inference'

-/**
- * Monitoring extension for system monitoring.
- */
-export { MonitoringExtension } from './monitoring'
+

 /**
 * Assistant extension for managing assistants.
@ -33,3 +30,8 @@ export * from './engines'
 *  Engines Management
 */
 export * from './enginesManagement'
+
+/**
+ *  Hardware Management
+ */
+export * from './hardwareManagement'
--- a/core/src/browser/extensions/monitoring.test.ts
+++ b/core/src/browser/extensions/monitoring.test.ts
@ -1,42 +0,0 @@
-
-import { ExtensionTypeEnum } from '../extension';
-import { MonitoringExtension } from './monitoring';
-
-it('should have the correct type', () => {
-  class TestMonitoringExtension extends MonitoringExtension {
-    getGpuSetting(): Promise<GpuSetting | undefined> {
-      throw new Error('Method not implemented.');
-    }
-    getResourcesInfo(): Promise<any> {
-      throw new Error('Method not implemented.');
-    }
-    getCurrentLoad(): Promise<any> {
-      throw new Error('Method not implemented.');
-    }
-    getOsInfo(): Promise<OperatingSystemInfo> {
-      throw new Error('Method not implemented.');
-    }
-  }
-  const monitoringExtension = new TestMonitoringExtension();
-  expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
-});
-
-
-it('should create an instance of MonitoringExtension', () => {
-  class TestMonitoringExtension extends MonitoringExtension {
-    getGpuSetting(): Promise<GpuSetting | undefined> {
-      throw new Error('Method not implemented.');
-    }
-    getResourcesInfo(): Promise<any> {
-      throw new Error('Method not implemented.');
-    }
-    getCurrentLoad(): Promise<any> {
-      throw new Error('Method not implemented.');
-    }
-    getOsInfo(): Promise<OperatingSystemInfo> {
-      throw new Error('Method not implemented.');
-    }
-  }
-  const monitoringExtension = new TestMonitoringExtension();
-  expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
-});
--- a/core/src/browser/extensions/monitoring.ts
+++ b/core/src/browser/extensions/monitoring.ts
@ -1,20 +0,0 @@
-import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
-
-/**
- * Monitoring extension for system monitoring.
- * @extends BaseExtension
- */
-export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
-  /**
-   * Monitoring extension type.
-   */
-  type(): ExtensionTypeEnum | undefined {
-    return ExtensionTypeEnum.SystemMonitoring
-  }
-
-  abstract getGpuSetting(): Promise<GpuSetting | undefined>
-  abstract getResourcesInfo(): Promise<any>
-  abstract getCurrentLoad(): Promise<any>
-  abstract getOsInfo(): Promise<OperatingSystemInfo>
-}
--- a/core/src/types/config/appConfigEntity.ts
+++ b/core/src/types/config/appConfigEntity.ts
@ -1,4 +1,5 @@
 export type AppConfiguration = {
  data_folder: string
  quick_ask: boolean
+  distinct_id?: string
 }
--- a/core/src/types/engine/index.ts
+++ b/core/src/types/engine/index.ts
@ -18,6 +18,7 @@ export type EngineMetadata = {
      template?: string
    }
  }
+  explore_models_url?: string
 }

 export type EngineVariant = {
--- a/core/src/types/hardware/index.ts
+++ b/core/src/types/hardware/index.ts
@ -0,0 +1,55 @@
+export type Cpu = {
+  arch: string
+  cores: number
+  instructions: string[]
+  model: string
+  usage: number
+}
+
+export type GpuAdditionalInformation = {
+  compute_cap: string
+  driver_version: string
+}
+
+export type Gpu = {
+  activated: boolean
+  additional_information?: GpuAdditionalInformation
+  free_vram: number
+  id: string
+  name: string
+  total_vram: number
+  uuid: string
+  version: string
+}
+
+export type Os = {
+  name: string
+  version: string
+}
+
+export type Power = {
+  battery_life: number
+  charging_status: string
+  is_power_saving: boolean
+}
+
+export type Ram = {
+  available: number
+  total: number
+  type: string
+}
+
+export type Storage = {
+  available: number
+  total: number
+  type: string
+}
+
+export type HardwareInformation = {
+  cpu: Cpu
+  gpus: Gpu[]
+  os: Os
+  power: Power
+  ram: Ram
+  storage: Storage
+}
--- a/core/src/types/index.test.ts
+++ b/core/src/types/index.test.ts
@ -4,7 +4,6 @@ import * as model from './model';
 import * as thread from './thread';
 import * as message from './message';
 import * as inference from './inference';
-import * as monitoring from './monitoring';
 import * as file from './file';
 import * as config from './config';
 import * as huggingface from './huggingface';
@ -18,7 +17,6 @@ import * as setting from './setting';
      expect(thread).toBeDefined();
      expect(message).toBeDefined();
      expect(inference).toBeDefined();
-      expect(monitoring).toBeDefined();
      expect(file).toBeDefined();
      expect(config).toBeDefined();
      expect(huggingface).toBeDefined();
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@ -3,7 +3,6 @@ export * from './model'
 export * from './thread'
 export * from './message'
 export * from './inference'
-export * from './monitoring'
 export * from './file'
 export * from './config'
 export * from './huggingface'
@ -11,3 +10,4 @@ export * from './miscellaneous'
 export * from './api'
 export * from './setting'
 export * from './engine'
+export * from './hardware'
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -1,33 +1,25 @@
+import { GpuAdditionalInformation } from '../hardware'
+
 export type SystemResourceInfo = {
  memAvailable: number
 }

-export type RunMode = 'cpu' | 'gpu'
-
 export type GpuSetting = {
-  notify: boolean
-  run_mode: RunMode
-  nvidia_driver: {
-    exist: boolean
-    version: string
-  }
-  cuda: {
-    exist: boolean
-    version: string
-  }
  gpus: GpuSettingInfo[]
-  gpu_highest_vram: string
-  gpus_in_use: string[]
-  is_initial: boolean
  // TODO: This needs to be set based on user toggle in settings
  vulkan: boolean
+  cpu?: any
 }

 export type GpuSettingInfo = {
+  activated: boolean
+  free_vram: number
  id: string
-  vram: string
  name: string
-  arch?: string
+  total_vram: number
+  uuid: string
+  version: string
+  additional_information?: GpuAdditionalInformation
 }

 export type SystemInformation = {
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
 export type OperatingSystemInfo = {
  platform: SupportedPlatform | 'unknown'
  arch: string
-  release: string
-  machine: string
-  version: string
  totalMem: number
  freeMem: number
 }
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -71,7 +71,7 @@ export type Model = {
  /**
   * The model identifier, modern version of id.
   */
-  mode?: string
+  model?: string

  /**
   * Human-readable name that is used for UI.
@ -150,6 +150,7 @@ export type ModelSettingParams = {
 */
 export type ModelRuntimeParams = {
  temperature?: number
+  max_temperature?: number
  token_limit?: number
  top_k?: number
  top_p?: number
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@ -1,13 +0,0 @@
-import * as monitoringInterface from './monitoringInterface'
-import * as resourceInfo from './resourceInfo'
-
-import * as index from './index'
-
-it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
-  for (const key in monitoringInterface) {
-    expect(index[key]).toBe(monitoringInterface[key])
-  }
-  for (const key in resourceInfo) {
-    expect(index[key]).toBe(resourceInfo[key])
-  }
-})
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@ -1,2 +0,0 @@
-export * from './monitoringInterface'
-export * from './resourceInfo'
--- a/core/src/types/monitoring/monitoringInterface.ts
+++ b/core/src/types/monitoring/monitoringInterface.ts
@ -1,29 +0,0 @@
-import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
-
-/**
- * Monitoring extension for system monitoring.
- * @extends BaseExtension
- */
-export interface MonitoringInterface {
-  /**
-   * Returns information about the system resources.
-   * @returns {Promise<any>} A promise that resolves with the system resources information.
-   */
-  getResourcesInfo(): Promise<any>
-
-  /**
-   * Returns the current system load.
-   * @returns {Promise<any>} A promise that resolves with the current system load.
-   */
-  getCurrentLoad(): Promise<any>
-
-  /**
-   * Returns the GPU configuration.
-   */
-  getGpuSetting(): Promise<GpuSetting | undefined>
-
-  /**
-   * Returns information about the operating system.
-   */
-  getOsInfo(): Promise<OperatingSystemInfo>
-}
--- a/core/src/types/monitoring/resourceInfo.ts
+++ b/core/src/types/monitoring/resourceInfo.ts
@ -1,6 +0,0 @@
-export type ResourceInfo = {
-  mem: {
-    totalMemory: number
-    usedMemory: number
-  }
-}
--- a/docs/src/pages/docs/configure-extensions.mdx
+++ b/docs/src/pages/docs/configure-extensions.mdx
@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Conversational",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables conversations and state persistence via your filesystem",
+        "description": "This extension enables conversations and state persistence via your filesystem.",
        "url": "extension://@janhq/conversational-extension/dist/index.js"
    },
    "@janhq/inference-anthropic-extension": {
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Anthropic Inference Engine",
        "version": "1.0.2",
        "main": "dist/index.js",
-        "description": "This extension enables Anthropic chat completion API calls",
+        "description": "This extension enables Anthropic chat completion API calls.",
        "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
    },
    "@janhq/inference-triton-trt-llm-extension": {
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Triton-TRT-LLM Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
+        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
        "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
    },
    "@janhq/inference-mistral-extension": {
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "MistralAI Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables Mistral chat completion API calls",
+        "description": "This extension enables Mistral chat completion API calls.",
        "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
    },
    "@janhq/inference-martian-extension": {
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Martian Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables Martian chat completion API calls",
+        "description": "This extension enables Martian chat completion API calls.",
        "url": "extension://@janhq/inference-martian-extension/dist/index.js"
    },
    "@janhq/inference-openrouter-extension": {
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "OpenRouter Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Open Router chat completion API calls",
+        "description": "This extension enables Open Router chat completion API calls.",
        "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
    },
    "@janhq/inference-nvidia-extension": {
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "NVIDIA NIM Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables NVIDIA chat completion API calls",
+        "description": "This extension enables NVIDIA chat completion API calls.",
        "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
    },
    "@janhq/inference-groq-extension": {
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Groq Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables fast Groq chat completion API calls",
+        "description": "This extension enables fast Groq chat completion API calls.",
        "url": "extension://@janhq/inference-groq-extension/dist/index.js"
    },
    "@janhq/inference-openai-extension": {
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "OpenAI Inference Engine",
        "version": "1.0.2",
        "main": "dist/index.js",
-        "description": "This extension enables OpenAI chat completion API calls",
+        "description": "This extension enables OpenAI chat completion API calls.",
        "url": "extension://@janhq/inference-openai-extension/dist/index.js"
    },
    "@janhq/inference-cohere-extension": {
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Cohere Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Cohere chat completion API calls",
+        "description": "This extension enables Cohere chat completion API calls.",
        "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
    },
    "@janhq/model-extension": {
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Model Management",
        "version": "1.0.33",
        "main": "dist/index.js",
-        "description": "Model Management Extension provides model exploration and seamless downloads",
+        "description": "Model Management Extension provides model exploration and seamless downloads.",
        "url": "extension://@janhq/model-extension/dist/index.js"
    },
    "@janhq/monitoring-extension": {
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "System Monitoring",
        "version": "1.0.10",
        "main": "dist/index.js",
-        "description": "This extension provides system health and OS level data",
+        "description": "This extension provides system health and OS level data.",
        "url": "extension://@janhq/monitoring-extension/dist/index.js"
    },
    "@janhq/assistant-extension": {
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Jan Assistant",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
+        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
        "url": "extension://@janhq/assistant-extension/dist/index.js"
    },
    "@janhq/tensorrt-llm-extension": {
--- a/docs/src/pages/docs/install-engines.mdx
+++ b/docs/src/pages/docs/install-engines.mdx
@ -47,8 +47,8 @@ To add a new remote engine:
 |-------|-------------|----------|
 | Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
 | API URL | The base URL of the provider's API | ✓ |
-| API Key | Your authentication key from the provider | ✓ |
-| Model List URL | URL for fetching available models | |
+| API Key | Your authentication key to activate this engine | ✓ |
+| Model List URL | The endpoint URL to fetch available models |
 | API Key Template | Custom authorization header format | |
 | Request Format Conversion | Function to convert Jan's request format to provider's format | |
 | Response Format Conversion | Function to convert provider's response format to Jan's format | |
--- a/electron/main.ts
+++ b/electron/main.ts
@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
 import { trayManager } from './managers/tray'
 import { logSystemInfo } from './utils/system'
 import { registerGlobalShortcuts } from './utils/shortcut'
+import { registerLogger } from './utils/logger'

 const preloadPath = join(__dirname, 'preload.js')
 const rendererPath = join(__dirname, '..', 'renderer')
@ -79,6 +80,7 @@ app
  })
  .then(setupCore)
  .then(createUserSpace)
+  .then(registerLogger)
  .then(migrate)
  .then(setupExtensions)
  .then(setupMenu)
--- a/electron/package.json
+++ b/electron/package.json
@ -1,6 +1,6 @@
 {
  "name": "jan",
-  "version": "0.1.4",
+  "version": "0.1.1737985524",
  "main": "./build/main.js",
  "author": "Jan <service@jan.ai>",
  "license": "MIT",
--- a/extensions/monitoring-extension/src/node/logger.ts
+++ b/extensions/monitoring-extension/src/node/logger.ts
@ -1,16 +1,28 @@
-import fs from 'fs'
+import {
+  createWriteStream,
+  existsSync,
+  mkdirSync,
+  readdir,
+  stat,
+  unlink,
+  writeFileSync,
+} from 'fs'
 import util from 'util'
 import {
  getAppConfigurations,
  getJanDataFolderPath,
  Logger,
+  LoggerManager,
 } from '@janhq/core/node'
 import path, { join } from 'path'

-export class FileLogger extends Logger {
+/**
+ * File Logger
+ */
+export class FileLogger implements Logger {
  name = 'file'
  logCleaningInterval: number = 120000
-  timeout: NodeJS.Timeout | null = null
+  timeout: NodeJS.Timeout | undefined
  appLogPath: string = './'
  logEnabled: boolean = true

@ -18,14 +30,13 @@ export class FileLogger extends Logger {
    logEnabled: boolean = true,
    logCleaningInterval: number = 120000
  ) {
-    super()
    this.logEnabled = logEnabled
    if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval

    const appConfigurations = getAppConfigurations()
    const logFolderPath = join(appConfigurations.data_folder, 'logs')
-    if (!fs.existsSync(logFolderPath)) {
-      fs.mkdirSync(logFolderPath, { recursive: true })
+    if (!existsSync(logFolderPath)) {
+      mkdirSync(logFolderPath, { recursive: true })
    }

    this.appLogPath = join(logFolderPath, 'app.log')
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
    const logDirectory = path.join(getJanDataFolderPath(), 'logs')
    // Perform log cleaning
    const currentDate = new Date()
-    if (fs.existsSync(logDirectory))
-      fs.readdir(logDirectory, (err, files) => {
+    if (existsSync(logDirectory))
+      readdir(logDirectory, (err, files) => {
        if (err) {
          console.error('Error reading log directory:', err)
          return
@ -78,7 +89,7 @@ export class FileLogger extends Logger {

        files.forEach((file) => {
          const filePath = path.join(logDirectory, file)
-          fs.stat(filePath, (err, stats) => {
+          stat(filePath, (err, stats) => {
            if (err) {
              console.error('Error getting file stats:', err)
              return
@ -86,7 +97,7 @@ export class FileLogger extends Logger {

            // Check size
            if (stats.size > size) {
-              fs.unlink(filePath, (err) => {
+              unlink(filePath, (err) => {
                if (err) {
                  console.error('Error deleting log file:', err)
                  return
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
                  (1000 * 3600 * 24)
              )
              if (daysDifference > days) {
-                fs.unlink(filePath, (err) => {
+                unlink(filePath, (err) => {
                  if (err) {
                    console.error('Error deleting log file:', err)
                    return
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
  }
 }

+/**
+ * Write log function implementation
+ * @param message
+ * @param logPath
+ */
 const writeLog = (message: string, logPath: string) => {
-  if (!fs.existsSync(logPath)) {
+  if (!existsSync(logPath)) {
    const logDirectory = path.join(getJanDataFolderPath(), 'logs')
-    if (!fs.existsSync(logDirectory)) {
-      fs.mkdirSync(logDirectory)
+    if (!existsSync(logDirectory)) {
+      mkdirSync(logDirectory)
    }
-    fs.writeFileSync(logPath, message)
+    writeFileSync(logPath, message)
  } else {
-    const logFile = fs.createWriteStream(logPath, {
+    const logFile = createWriteStream(logPath, {
      flags: 'a',
    })
    logFile.write(util.format(message) + '\n')
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
    console.debug(message)
  }
 }
+
+/**
+ * Register logger for global application logging
+ */
+export const registerLogger = () => {
+  const logger = new FileLogger()
+  LoggerManager.instance().register(logger)
+  logger.cleanLogs()
+}
--- a/extensions/engine-management-extension/engines.mjs
+++ b/extensions/engine-management-extension/engines.mjs
@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
 import martian from './resources/martian.json' with { type: 'json' }
 import mistral from './resources/mistral.json' with { type: 'json' }
 import nvidia from './resources/nvidia.json' with { type: 'json' }
+import deepseek from './resources/deepseek.json' with { type: 'json' }
+import googleGemini from './resources/google_gemini.json' with { type: 'json' }

 import anthropicModels from './models/anthropic.json' with { type: 'json' }
 import cohereModels from './models/cohere.json' with { type: 'json' }
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
 import martianModels from './models/martian.json' with { type: 'json' }
 import mistralModels from './models/mistral.json' with { type: 'json' }
 import nvidiaModels from './models/nvidia.json' with { type: 'json' }
+import deepseekModels from './models/deepseek.json' with { type: 'json' }
+import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }

 const engines = [
  anthropic,
@ -25,6 +29,8 @@ const engines = [
  mistral,
  martian,
  nvidia,
+  deepseek,
+  googleGemini,
 ]
 const models = [
  ...anthropicModels,
@ -35,5 +41,7 @@ const models = [
  ...mistralModels,
  ...martianModels,
  ...nvidiaModels,
+  ...deepseekModels,
+  ...googleGeminiModels,
 ]
 export { engines, models }
--- a/extensions/engine-management-extension/models/anthropic.json
+++ b/extensions/engine-management-extension/models/anthropic.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
@ -21,6 +22,7 @@
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
@ -34,6 +36,7 @@
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
--- a/extensions/engine-management-extension/models/cohere.json
+++ b/extensions/engine-management-extension/models/cohere.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "stream": false
    },
    "engine": "cohere"
@ -21,6 +22,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "stream": false
    },
    "engine": "cohere"
--- a/extensions/engine-management-extension/models/deepseek.json
+++ b/extensions/engine-management-extension/models/deepseek.json
@ -0,0 +1,28 @@
+[
+  {
+    "model": "deepseek-chat",
+    "object": "model",
+    "name": "DeepSeek Chat",
+    "version": "1.0",
+    "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "deepseek"
+  },
+  {
+    "model": "deepseek-reasoner",
+    "object": "model",
+    "name": "DeepSeek R1",
+    "version": "1.0",
+    "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "deepseek"
+  }
+]
--- a/extensions/engine-management-extension/models/google_gemini.json
+++ b/extensions/engine-management-extension/models/google_gemini.json
@ -0,0 +1,67 @@
+[
+  {
+    "model": "gemini-2.0-flash",
+    "object": "model",
+    "name": "Gemini 2.0 Flash",
+    "version": "1.0",
+    "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "google_gemini"
+  },
+  {
+    "model": "gemini-2.0-flash-lite-preview",
+    "object": "model",
+    "name": "Gemini 2.0 Flash-Lite Preview",
+    "version": "1.0",
+    "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "google_gemini"
+  },
+  {
+    "model": "gemini-1.5-flash",
+    "object": "model",
+    "name": "Gemini 1.5 Flash",
+    "version": "1.0",
+    "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "google_gemini"
+  },
+  {
+    "model": "gemini-1.5-flash-8b",
+    "object": "model",
+    "name": "Gemini 1.5 Flash-8B",
+    "version": "1.0",
+    "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "google_gemini"
+  },
+  {
+    "model": "gemini-1.5-pro",
+    "object": "model",
+    "name": "Gemini 1.5 Pro",
+    "version": "1.0",
+    "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
+    "inference_params": {
+      "max_tokens": 8192,
+      "temperature": 0.6,
+      "stream": true
+    },
+    "engine": "google_gemini"
+  }
+]
--- a/extensions/engine-management-extension/models/mistral.json
+++ b/extensions/engine-management-extension/models/mistral.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
@ -22,6 +23,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
@ -36,6 +38,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
+      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
--- a/extensions/engine-management-extension/models/nvidia.json
+++ b/extensions/engine-management-extension/models/nvidia.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 1024,
      "temperature": 0.3,
+      "max_temperature": 1.0,
      "top_p": 1,
      "stream": false,
      "frequency_penalty": 0,
--- a/extensions/engine-management-extension/models/openai.json
+++ b/extensions/engine-management-extension/models/openai.json
@ -79,12 +79,7 @@
    "description": "OpenAI o1 is a new model with complex reasoning",
    "format": "api",
    "inference_params": {
-      "max_tokens": 100000,
-      "temperature": 1,
-      "top_p": 1,
-      "stream": true,
-      "frequency_penalty": 0,
-      "presence_penalty": 0
+      "max_tokens": 100000
    },
    "engine": "openai"
  },
@ -97,11 +92,7 @@
    "format": "api",
    "inference_params": {
      "max_tokens": 32768,
-      "temperature": 1,
-      "top_p": 1,
-      "stream": true,
-      "frequency_penalty": 0,
-      "presence_penalty": 0
+      "stream": true
    },
    "engine": "openai"
  },
@ -114,11 +105,20 @@
    "format": "api",
    "inference_params": {
      "max_tokens": 65536,
-      "temperature": 1,
-      "top_p": 1,
-      "stream": true,
-      "frequency_penalty": 0,
-      "presence_penalty": 0
+      "stream": true
+    },
+    "engine": "openai"
+  },
+  {
+    "model": "o3-mini",
+    "object": "model",
+    "name": "OpenAI o3-mini",
+    "version": "1.0",
+    "description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
+    "format": "api",
+    "inference_params": {
+      "max_tokens": 100000,
+      "stream": true
    },
    "engine": "openai"
  }
--- a/extensions/engine-management-extension/models/openrouter.json
+++ b/extensions/engine-management-extension/models/openrouter.json
@ -1,16 +1,91 @@
 [
  {
-    "model": "open-router-auto",
+    "model": "deepseek/deepseek-r1:free",
    "object": "model",
-    "name": "OpenRouter",
+    "name": "DeepSeek: R1",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
-      "max_tokens": 128000,
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
-      "presence_penalty": 0
+      "presence_penalty": 0,
+      "stream": true
+    },
+    "engine": "openrouter"
+  },
+  {
+    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
+    "object": "model",
+    "name": "DeepSeek: R1 Distill Llama 70B",
+    "version": "1.0",
+    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "inference_params": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "frequency_penalty": 0,
+      "presence_penalty": 0,
+      "stream": true
+    },
+    "engine": "openrouter"
+  },
+  {
+    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
+    "object": "model",
+    "name": "DeepSeek: R1 Distill Llama 70B",
+    "version": "1.0",
+    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "inference_params": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "frequency_penalty": 0,
+      "presence_penalty": 0,
+      "stream": true
+    },
+    "engine": "openrouter"
+  },
+  {
+    "model": "meta-llama/llama-3.1-405b-instruct:free",
+    "object": "model",
+    "name": "Meta: Llama 3.1 405B Instruct",
+    "version": "1.0",
+    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "inference_params": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "frequency_penalty": 0,
+      "presence_penalty": 0,
+      "stream": true
+    },
+    "engine": "openrouter"
+  },
+  {
+    "model": "qwen/qwen-vl-plus:free",
+    "object": "model",
+    "name": "Qwen: Qwen VL Plus",
+    "version": "1.0",
+    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "inference_params": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "frequency_penalty": 0,
+      "presence_penalty": 0,
+      "stream": true
+    },
+    "engine": "openrouter"
+  },
+  {
+    "model": "qwen/qwen2.5-vl-72b-instruct:free",
+    "object": "model",
+    "name": "Qwen: Qwen2.5 VL 72B Instruct",
+    "version": "1.0",
+    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "inference_params": {
+      "temperature": 0.7,
+      "top_p": 0.95,
+      "frequency_penalty": 0,
+      "presence_penalty": 0,
+      "stream": true
    },
    "engine": "openrouter"
  }
--- a/extensions/engine-management-extension/package.json
+++ b/extensions/engine-management-extension/package.json
@ -29,12 +29,10 @@
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
-    "cpu-instructions": "^0.0.13",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1"
  },
  "bundledDependencies": [
-    "cpu-instructions",
    "@janhq/core"
  ],
  "engines": {
--- a/extensions/engine-management-extension/resources/anthropic.json
+++ b/extensions/engine-management-extension/resources/anthropic.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-anthropic-extension",
+  "id": "anthropic",
  "type": "remote",
  "engine": "anthropic",
  "url": "https://console.anthropic.com/settings/keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.anthropic.com/v1/messages",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
-      }
-    }
+        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
+      }
+    },
+    "explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
  }
 }
--- a/extensions/engine-management-extension/resources/cohere.json
+++ b/extensions/engine-management-extension/resources/cohere.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-cohere-extension",
+  "id": "cohere",
  "type": "remote",
  "engine": "cohere",
  "url": "https://dashboard.cohere.com/api-keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.cohere.ai/v1/chat",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %}  \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
      }
-    }
+    },
+    "explore_models_url": "https://docs.cohere.com/v2/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/deepseek.json
+++ b/extensions/engine-management-extension/resources/deepseek.json
@ -0,0 +1,23 @@
+{
+  "id": "deepseek",
+  "type": "remote",
+  "engine": "deepseek",
+  "url": "https://platform.deepseek.com/api_keys",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://api.deepseek.com/models",
+    "header_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://api.deepseek.com/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
+  }
+}
--- a/extensions/engine-management-extension/resources/google_gemini.json
+++ b/extensions/engine-management-extension/resources/google_gemini.json
@ -0,0 +1,23 @@
+{
+  "id": "google_gemini",
+  "type": "remote",
+  "engine": "google_gemini",
+  "url": "https://aistudio.google.com/apikey",
+  "api_key": "",
+  "metadata": {
+    "get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
+    "header_template": "Authorization: Bearer {{api_key}}",
+    "transform_req": {
+      "chat_completions": {
+        "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+      }
+    },
+    "transform_resp": {
+      "chat_completions": {
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
+  }
+}
--- a/extensions/engine-management-extension/resources/groq.json
+++ b/extensions/engine-management-extension/resources/groq.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-groq-extension",
+  "id": "groq",
  "type": "remote",
  "engine": "groq",
  "url": "https://console.groq.com/keys",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    }
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://console.groq.com/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/martian.json
+++ b/extensions/engine-management-extension/resources/martian.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-martian-extension",
+  "id": "martian",
  "type": "remote",
  "engine": "martian",
  "url": "https://withmartian.com/dashboard",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    }
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://withmartian.github.io/llm-adapters/"
  }
 }
--- a/extensions/engine-management-extension/resources/mistral.json
+++ b/extensions/engine-management-extension/resources/mistral.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-mistral-extension",
+  "id": "mistral",
  "type": "remote",
  "engine": "mistral",
  "url": "https://console.mistral.ai/api-keys/",
@ -17,6 +17,7 @@
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
-    }
+    },
+    "explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
  }
 }
--- a/extensions/engine-management-extension/resources/nvidia.json
+++ b/extensions/engine-management-extension/resources/nvidia.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-nvidia-extension",
+  "id": "nvidia",
  "type": "remote",
  "engine": "nvidia",
  "url": "https://org.ngc.nvidia.com/setup/personal-keys",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    }
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://build.nvidia.com/models"
  }
 }
--- a/extensions/engine-management-extension/resources/openai.json
+++ b/extensions/engine-management-extension/resources/openai.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-openai-extension",
+  "id": "openai",
  "type": "remote",
  "engine": "openai",
  "url": "https://platform.openai.com/account/api-keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.openai.com/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    }
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://platform.openai.com/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/openrouter.json
+++ b/extensions/engine-management-extension/resources/openrouter.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-openrouter-extension",
+  "id": "openrouter",
  "type": "remote",
  "engine": "openrouter",
  "url": "https://openrouter.ai/keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://openrouter.ai/api/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
-      }
-    }
+        "template": "{{tojson(input_request)}}"
+      }
+    },
+    "explore_models_url": "https://openrouter.ai/models"
  }
 }
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@ -13,9 +13,19 @@ export default defineConfig([
      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
      API_URL: JSON.stringify('http://127.0.0.1:39291'),
      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+      PLATFORM: JSON.stringify(process.platform),
      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
      DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
      DEFAULT_REMOTE_MODELS: JSON.stringify(models),
+      DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
+        `{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
+      ),
+      DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
+        '{{tojson(input_request)}}'
+      ),
+      DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
+        'Authorization: Bearer {{api_key}}'
+      ),
    },
  },
  {
@ -29,15 +39,4 @@ export default defineConfig([
      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
    },
  },
-  {
-    input: 'src/node/cpuInfo.ts',
-    output: {
-      format: 'cjs',
-      file: 'dist/node/cpuInfo.js',
-    },
-    external: ['cpu-instructions'],
-    resolve: {
-      extensions: ['.ts', '.js', '.svg'],
-    },
-  },
 ])
--- a/extensions/engine-management-extension/src/@types/global.d.ts
+++ b/extensions/engine-management-extension/src/@types/global.d.ts
@ -1,7 +1,11 @@
 declare const API_URL: string
 declare const CORTEX_ENGINE_VERSION: string
+declare const PLATFORM: string
 declare const SOCKET_URL: string
 declare const NODE: string
+declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
+declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
+declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string

 declare const DEFAULT_REMOTE_ENGINES: ({
  id: string
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
 import PQueue from 'p-queue'
 import { EngineError } from './error'
 import { getJanDataFolderPath } from '@janhq/core'
+import { engineVariant } from './utils'

+interface ModelList {
+  data: Model[]
+}
 /**
- * JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
+ * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
 * functionality for managing engines.
 */
-export default class JSONEngineManagementExtension extends EngineManagementExtension {
+export default class JanEngineManagementExtension extends EngineManagementExtension {
  queue = new PQueue({ concurrency: 1 })

  /**
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * @returns A Promise that resolves to an object of list engines.
   */
  async getRemoteModels(name: string): Promise<any> {
-    return this.queue.add(() =>
-      ky
+    return ky
      .get(`${API_URL}/v1/models/remote/${name}`)
-        .json<Model[]>()
-        .then((e) => e)
-        .catch(() => [])
-    ) as Promise<Model[]>
+      .json<ModelList>()
+      .catch(() => ({
+        data: [],
+      })) as Promise<ModelList>
  }

  /**
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * Add a new remote engine
   * @returns A Promise that resolves to intall of engine.
   */
-  async addRemoteEngine(engineConfig: EngineConfig) {
+  async addRemoteEngine(
+    engineConfig: EngineConfig,
+    persistModels: boolean = true
+  ) {
+    // Populate default settings
+    if (
+      engineConfig.metadata?.transform_req?.chat_completions &&
+      !engineConfig.metadata.transform_req.chat_completions.template
+    )
+      engineConfig.metadata.transform_req.chat_completions.template =
+        DEFAULT_REQUEST_PAYLOAD_TRANSFORM
+
+    if (
+      engineConfig.metadata?.transform_resp?.chat_completions &&
+      !engineConfig.metadata.transform_resp.chat_completions?.template
+    )
+      engineConfig.metadata.transform_resp.chat_completions.template =
+        DEFAULT_RESPONSE_BODY_TRANSFORM
+
+    if (engineConfig.metadata && !engineConfig.metadata?.header_template)
+      engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
+
    return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
+      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
+        if (persistModels && engineConfig.metadata?.get_models_url) {
+          // Pull /models from remote models endpoint
+          return this.populateRemoteModels(engineConfig)
+            .then(() => e)
+            .catch(() => e)
+        }
+        return e
+      })
    ) as Promise<{ messages: string }>
  }

@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * @param model - Remote model object.
   */
  async addRemoteModel(model: Model) {
-    return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
+    return this.queue
+      .add(() =>
+        ky
+          .post(`${API_URL}/v1/models/add`, {
+            json: {
+              inference_params: {
+                max_tokens: 4096,
+                temperature: 0.7,
+                top_p: 0.95,
+                stream: true,
+                frequency_penalty: 0,
+                presence_penalty: 0,
+              },
+              ...model,
+            },
+          })
+          .then((e) => e)
      )
+      .then(() => {})
  }

  /**
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
        error instanceof EngineError
      ) {
        const systemInfo = await systemInformation()
-        const variant = await executeOnMain(
-          NODE,
-          'engineVariant',
-          systemInfo.gpuSetting
-        )
+        const variant = await engineVariant(systemInfo.gpuSetting)
        await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
          variant: variant,
          version: `${CORTEX_ENGINE_VERSION}`,
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
          data.api_key = api_key
          /// END - Migrate legacy api key settings

-          await this.addRemoteEngine(data).catch(console.error)
+          await this.addRemoteEngine(data, false).catch(console.error)
        })
      )
      events.emit(EngineEvent.OnEngineUpdate, {})
-      DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
-        await this.addRemoteModel(data).catch(() => {})
-      })
+      await Promise.all(
+        DEFAULT_REMOTE_MODELS.map((data: Model) =>
+          this.addRemoteModel(data).catch(() => {})
+        )
+      )
      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
    }
  }
+
+  /**
+   * Pulls models list from the remote provider and persist
+   * @param engineConfig
+   * @returns
+   */
+  private populateRemoteModels = async (engineConfig: EngineConfig) => {
+    return this.getRemoteModels(engineConfig.engine)
+      .then((models: ModelList) => {
+        if (models?.data)
+          Promise.all(
+            models.data.map((model) =>
+              this.addRemoteModel({
+                ...model,
+                engine: engineConfig.engine as InferenceEngine,
+                model: model.model ?? model.id,
+              }).catch(console.info)
+            )
+          ).then(() => {
+            events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
+          })
+      })
+      .catch(console.info)
+  }
 }
--- a/extensions/engine-management-extension/src/node/cpuInfo.ts
+++ b/extensions/engine-management-extension/src/node/cpuInfo.ts
@ -1,27 +0,0 @@
-import { cpuInfo } from 'cpu-instructions'
-
-// Check the CPU info and determine the supported instruction set
-const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
-  ? 'avx512'
-  : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
-    ? 'avx2'
-    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
-      ? 'avx'
-      : 'noavx'
-
-// Send the result and wait for confirmation before exiting
-new Promise<void>((resolve, reject) => {
-  // @ts-ignore
-  process.send(info, (error: Error | null) => {
-    if (error) {
-      reject(error)
-    } else {
-      resolve()
-    }
-  })
-})
-  .then(() => process.exit(0))
-  .catch((error) => {
-    console.error('Failed to send info:', error)
-    process.exit(1)
-  })
--- a/extensions/engine-management-extension/src/node/index.test.ts
+++ b/extensions/engine-management-extension/src/node/index.test.ts
@ -1,7 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
 import engine from './index'
-import { GpuSetting } from '@janhq/core/node'
-import { cpuInfo } from 'cpu-instructions'
+import { GpuSetting } from '@janhq/core'
 import { fork } from 'child_process'

 let testSettings: GpuSetting = {
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
 }
 const originalPlatform = process.platform

-jest.mock('cpu-instructions', () => ({
-  cpuInfo: {
-    cpuInfo: jest.fn(),
-  },
-}))
-let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
-mockCpuInfo.mockReturnValue([])

-jest.mock('@janhq/core/node', () => ({
+
+jest.mock('@janhq/core', () => ({
  appResourcePath: () => '.',
  log: jest.fn(),
 }))
-jest.mock('child_process', () => ({
-  fork: jest.fn(),
-}))
-const mockFork = fork as jest.Mock

 describe('test executable cortex file', () => {
  afterAll(function () {
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
  })

  it('executes on MacOS', () => {
-    const mockProcess = {
-      on: jest.fn((event, callback) => {
-        if (event === 'message') {
-          callback('noavx')
-        }
-      }),
-      send: jest.fn(),
-    }
+
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
    })
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
      value: 'arm64',
    })

-    mockFork.mockReturnValue(mockProcess)
+
    expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
  })

@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
-    mockFork.mockReturnValue(mockProcess)
+
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
-    mockFork.mockReturnValue(mockProcess)

    expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
  })
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
-    mockFork.mockReturnValue(mockProcess)

    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-avx2-cuda-11-7'
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-    mockFork.mockReturnValue({
-      on: jest.fn((event, callback) => {
-        if (event === 'message') {
-          callback('noavx')
-        }
-      }),
-      send: jest.fn(),
-    })

    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-noavx-cuda-12-0'
    )
-    mockFork.mockReturnValue({
-      on: jest.fn((event, callback) => {
-        if (event === 'message') {
-          callback('avx512')
-        }
-      }),
-      send: jest.fn(),
-    })
+
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-avx2-cuda-12-0'
    )
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
-    mockFork.mockReturnValue({
-      on: jest.fn((event, callback) => {
-        if (event === 'message') {
-          callback('noavx')
-        }
-      }),
-      send: jest.fn(),
-    })

    expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
  })
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-
-    mockFork.mockReturnValue({
-      on: jest.fn((event, callback) => {
-        if (event === 'message') {
-          callback('avx512')
-        }
-      }),
-      send: jest.fn(),
-    })
-
    expect(engine.engineVariant(settings)).resolves.toBe(
      'linux-amd64-avx2-cuda-11-7'
    )
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-    mockFork.mockReturnValue({
-      on: jest.fn((event, callback) => {
-        if (event === 'message') {
-          callback('avx2')
-        }
-      }),
-      send: jest.fn(),
-    })
+

    expect(engine.engineVariant(settings)).resolves.toEqual(
      'linux-amd64-avx2-cuda-12-0'
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {

    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
-        on: jest.fn((event, callback) => {
-          if (event === 'message') {
-            callback(instruction)
-          }
-        }),
-        send: jest.fn(),
-      })
-
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-${instruction}`
      )
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
-        on: jest.fn((event, callback) => {
-          if (event === 'message') {
-            callback(instruction)
-          }
-        }),
-        send: jest.fn(),
-      })
+
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `windows-amd64-${instruction}`
      )
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
-        on: jest.fn((event, callback) => {
-          if (event === 'message') {
-            callback(instruction)
-          }
-        }),
-        send: jest.fn(),
-      })
+
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
-        on: jest.fn((event, callback) => {
-          if (event === 'message') {
-            callback(instruction)
-          }
-        }),
-        send: jest.fn(),
-      })
+
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
-        on: jest.fn((event, callback) => {
-          if (event === 'message') {
-            callback(instruction)
-          }
-        }),
-        send: jest.fn(),
-      })
+
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-vulkan`
      )
--- a/extensions/engine-management-extension/src/node/index.ts
+++ b/extensions/engine-management-extension/src/node/index.ts
@ -2,111 +2,10 @@ import * as path from 'path'
 import {
  appResourcePath,
  getJanDataFolderPath,
-  GpuSetting,
  log,
 } from '@janhq/core/node'
-import { fork } from 'child_process'
 import { mkdir, readdir, symlink } from 'fs/promises'

-/**
- * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
- * @param settings
- * @returns
- */
-const gpuRunMode = (settings?: GpuSetting): string => {
-  if (process.platform === 'darwin')
-    // MacOS now has universal binaries
-    return ''
-
-  if (!settings) return ''
-
-  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
-}
-
-/**
- * The OS & architecture that the current process is running on.
- * @returns win, mac-x64, mac-arm64, or linux
- */
-const os = (): string => {
-  return process.platform === 'win32'
-    ? 'windows-amd64'
-    : process.platform === 'darwin'
-      ? process.arch === 'arm64'
-        ? 'mac-arm64'
-        : 'mac-amd64'
-      : 'linux-amd64'
-}
-
-/**
- * The CUDA version that will be set - either '11-7' or '12-0'.
- * @param settings
- * @returns
- */
-const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
-  const isUsingCuda =
-    settings?.vulkan !== true &&
-    settings?.run_mode === 'gpu' &&
-    !os().includes('mac')
-
-  if (!isUsingCuda) return undefined
-  return settings?.cuda?.version === '11' ? '11-7' : '12-0'
-}
-
-/**
- * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
- * @returns
- */
-const cpuInstructions = async (): Promise<string> => {
-  if (process.platform === 'darwin') return ''
-
-  const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
-
-  return new Promise((resolve, reject) => {
-    child.on('message', (cpuInfo?: string) => {
-      resolve(cpuInfo ?? 'noavx')
-      child.kill() // Kill the child process after receiving the result
-    })
-
-    child.on('error', (err) => {
-      resolve('noavx')
-      child.kill()
-    })
-
-    child.on('exit', (code) => {
-      if (code !== 0) {
-        resolve('noavx')
-        child.kill()
-      }
-    })
-  })
-}
-
-/**
- * Find which variant to run based on the current platform.
- */
-const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
-  const cpuInstruction = await cpuInstructions()
-  log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
-  let engineVariant = [
-    os(),
-    gpuSetting?.vulkan
-      ? 'vulkan'
-      : gpuRunMode(gpuSetting) !== 'cuda'
-        ? // CPU mode - support all variants
-          cpuInstruction
-        : // GPU mode - packaged CUDA variants of avx2 and noavx
-          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          ? 'avx2'
-          : 'noavx',
-    gpuRunMode(gpuSetting),
-    cudaVersion(gpuSetting),
-  ]
-    .filter((e) => !!e)
-    .join('-')
-
-  log(`[CORTEX]: Engine variant: ${engineVariant}`)
-  return engineVariant
-}

 /**
 * Create symlink to each variant for the default bundled version
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
 }

 export default {
-  engineVariant,
  symlinkEngines,
 }
--- a/extensions/engine-management-extension/src/utils.ts
+++ b/extensions/engine-management-extension/src/utils.ts
@ -0,0 +1,86 @@
+import { GpuSetting, log } from '@janhq/core'
+
+/**
+ * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
+ * @param settings
+ * @returns
+ */
+
+const gpuRunMode = (settings?: GpuSetting): string => {
+  return settings.gpus?.some(
+    (gpu) =>
+      gpu.activated === true &&
+      gpu.additional_information &&
+      gpu.additional_information.driver_version
+  )
+    ? 'cuda'
+    : ''
+}
+
+/**
+ * The OS & architecture that the current process is running on.
+ * @returns win, mac-x64, mac-arm64, or linux
+ */
+const os = (settings?: GpuSetting): string => {
+  return PLATFORM === 'win32'
+    ? 'windows-amd64'
+    : PLATFORM === 'darwin'
+    ? settings?.cpu?.arch === 'arm64'
+      ? 'mac-arm64'
+      : 'mac-amd64'
+    : 'linux-amd64'
+}
+
+/**
+ * The CUDA version that will be set - either '11-7' or '12-0'.
+ * @param settings
+ * @returns
+ */
+const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
+  const isUsingCuda =
+    settings?.vulkan !== true &&
+    settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
+    !os().includes('mac')
+
+  if (!isUsingCuda) return undefined
+  // return settings?.cuda?.version === '11' ? '11-7' : '12-0'
+  return settings.gpus?.some((gpu) => gpu.version.includes('12'))
+    ? '12-0'
+    : '11-7'
+}
+
+/**
+ * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
+ * @returns
+ */
+
+/**
+ * Find which variant to run based on the current platform.
+ */
+export const engineVariant = async (
+  gpuSetting?: GpuSetting
+): Promise<string> => {
+  const platform = os(gpuSetting)
+
+  // There is no need to append the variant extension for mac
+  if (platform.startsWith('mac')) return platform
+
+  let engineVariant =
+    gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
+      ? [platform, 'vulkan']
+      : [
+          platform,
+          gpuRunMode(gpuSetting) === 'cuda' &&
+          (gpuSetting.cpu.instructions.includes('avx2') ||
+            gpuSetting.cpu.instructions.includes('avx512'))
+            ? 'avx2'
+            : 'noavx',
+          gpuRunMode(gpuSetting),
+          cudaVersion(gpuSetting),
+        ].filter(Boolean) // Remove any falsy values
+
+  let engineVariantString = engineVariant.join('-')
+
+  log(`[CORTEX]: Engine variant: ${engineVariantString}`)
+  return engineVariantString
+}
--- a/extensions/hardware-management-extension/jest.config.js
+++ b/extensions/hardware-management-extension/jest.config.js
@ -0,0 +1,5 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: 'ts-jest',
+  testEnvironment: 'node',
+}
--- a/extensions/hardware-management-extension/package.json
+++ b/extensions/hardware-management-extension/package.json
@ -0,0 +1,48 @@
+{
+  "name": "@janhq/hardware-management-extension",
+  "productName": "Hardware Management",
+  "version": "1.0.0",
+  "description": "Manages Better Hardware settings.",
+  "main": "dist/index.js",
+  "node": "dist/node/index.cjs.js",
+  "author": "Jan <service@jan.ai>",
+  "license": "MIT",
+  "scripts": {
+    "test": "jest",
+    "build": "rolldown -c rolldown.config.mjs",
+    "codesign:darwin": "../../.github/scripts/auto-sign.sh",
+    "codesign:win32:linux": "echo 'No codesigning required'",
+    "codesign": "run-script-os",
+    "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
+  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/module.js"
+  },
+  "devDependencies": {
+    "cpx": "^1.5.0",
+    "rimraf": "^3.0.2",
+    "rolldown": "^1.0.0-beta.1",
+    "run-script-os": "^1.1.6",
+    "ts-loader": "^9.5.0",
+    "typescript": "^5.3.3"
+  },
+  "dependencies": {
+    "@janhq/core": "../../core/package.tgz",
+    "cpu-instructions": "^0.0.13",
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1"
+  },
+  "bundledDependencies": [
+    "cpu-instructions",
+    "@janhq/core"
+  ],
+  "hardwares": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ]
+}
--- a/extensions/hardware-management-extension/rolldown.config.mjs
+++ b/extensions/hardware-management-extension/rolldown.config.mjs
@ -0,0 +1,17 @@
+import { defineConfig } from 'rolldown'
+import pkgJson from './package.json' with { type: 'json' }
+
+export default defineConfig([
+  {
+    input: 'src/index.ts',
+    output: {
+      format: 'esm',
+      file: 'dist/index.js',
+    },
+    define: {
+      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
+      API_URL: JSON.stringify('http://127.0.0.1:39291'),
+      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+    },
+  },
+])
--- a/extensions/hardware-management-extension/src/@types/global.d.ts
+++ b/extensions/hardware-management-extension/src/@types/global.d.ts
@ -0,0 +1,12 @@
+declare const API_URL: string
+declare const SOCKET_URL: string
+declare const NODE: string
+
+interface Core {
+  api: APIFunctions
+  events: EventEmitter
+}
+interface Window {
+  core?: Core | undefined
+  electronAPI?: any | undefined
+}
--- a/extensions/hardware-management-extension/src/index.ts
+++ b/extensions/hardware-management-extension/src/index.ts
@ -0,0 +1,67 @@
+import {
+  executeOnMain,
+  HardwareManagementExtension,
+  HardwareInformation,
+} from '@janhq/core'
+import ky from 'ky'
+import PQueue from 'p-queue'
+
+/**
+ * JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
+ * functionality for managing engines.
+ */
+export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
+  queue = new PQueue({ concurrency: 1 })
+
+  /**
+   * Called when the extension is loaded.
+   */
+  async onLoad() {
+    // Run Healthcheck
+    this.queue.add(() => this.healthz())
+  }
+
+  /**
+   * Called when the extension is unloaded.
+   */
+  onUnload() {}
+
+  /**
+   * Do health check on cortex.cpp
+   * @returns
+   */
+  async healthz(): Promise<void> {
+    return ky
+      .get(`${API_URL}/healthz`, {
+        retry: { limit: 20, delay: () => 500, methods: ['get'] },
+      })
+      .then(() => {})
+  }
+
+  /**
+   * @returns A Promise that resolves to an object of hardware.
+   */
+  async getHardware(): Promise<HardwareInformation> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/hardware`)
+        .json<HardwareInformation>()
+        .then((e) => e)
+    ) as Promise<HardwareInformation>
+  }
+
+  /**
+   * @returns A Promise that resolves to an object of set gpu activate.
+   */
+  async setAvtiveGpu(data: { gpus: number[] }): Promise<{
+    message: string
+    activated_gpus: number[]
+  }> {
+    return this.queue.add(() =>
+      ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
+    ) as Promise<{
+      message: string
+      activated_gpus: number[]
+    }>
+  }
+}
--- a/extensions/hardware-management-extension/tsconfig.json
+++ b/extensions/hardware-management-extension/tsconfig.json
@ -8,7 +8,9 @@
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
-    "rootDir": "./src"
+    "rootDir": "./src",
+    "resolveJsonModule": true
  },
-  "include": ["./src"]
+  "include": ["./src"],
+  "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
 }
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.9-rc7
+1.0.10
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.24",
+  "version": "1.0.25",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@ -76,7 +76,7 @@
  },
  {
    "key": "use_mmap",
-    "title": "MMAP",
+    "title": "mmap",
    "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
    "controllerType": "checkbox",
    "controllerProps": {
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-llama-70b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Llama 70B Q4",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
+    "ngl": 81
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["70B", "Featured"],
+    "size": 42500000000
+  },
+  "engine": "llama-cpp"
+}
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-llama-8b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Llama 8B Q5",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
+    "ngl": 33
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["8B", "Featured"],
+    "size": 5730000000
+  },
+  "engine": "llama-cpp"
+}
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-1.5b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 1.5B Q5",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
+    "ngl": 29
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["1.5B", "Featured"],
+    "size": 1290000000
+  },
+  "engine": "llama-cpp"
+}
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-14b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 14B Q4",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
+    "ngl": 49
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["14B", "Featured"],
+    "size": 8990000000
+  },
+  "engine": "llama-cpp"
+}
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-32b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 32B Q4",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
+    "ngl": 65
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["32B", "Featured"],
+    "size": 19900000000
+  },
+  "engine": "llama-cpp"
+}
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
@ -0,0 +1,35 @@
+{
+  "sources": [
+    {
+      "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
+      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
+    }
+  ],
+  "id": "deepseek-r1-distill-qwen-7b",
+  "object": "model",
+  "name": "DeepSeek R1 Distill Qwen 7B Q5",
+  "version": "1.0",
+  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
+  "format": "gguf",
+  "settings": {
+    "ctx_len": 131072,
+    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
+    "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
+    "ngl": 29
+  },
+  "parameters": {
+    "temperature": 0.6,
+    "top_p": 0.95,
+    "stream": true,
+    "max_tokens": 131072,
+    "stop": [],
+    "frequency_penalty": 0,
+    "presence_penalty": 0
+  },
+  "metadata": {
+    "author": "DeepSeek",
+    "tags": ["7B", "Featured"],
+    "size": 5440000000
+  },
+  "engine": "llama-cpp"
+}
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -22,19 +22,13 @@
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 8192,
-    "stop": [
-      "<|end_of_text|>",
-      "<|eot_id|>",
-      "<|eom_id|>"
-    ],
+    "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "MetaAI",
-    "tags": [
-      "8B", "Featured"
-    ],
+    "tags": ["8B", "Featured"],
    "size": 4920000000
  },
  "engine": "llama-cpp"
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
 import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
 import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }

+import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
+import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
+import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
+import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
+
 export default defineConfig([
  {
    input: 'src/index.ts',
@ -106,6 +113,12 @@ export default defineConfig([
        qwen2514bJson,
        qwen2532bJson,
        qwen2572bJson,
+        deepseekR1DistillQwen_1_5b,
+        deepseekR1DistillQwen_7b,
+        deepseekR1DistillQwen_14b,
+        deepseekR1DistillQwen_32b,
+        deepseekR1DistillLlama_8b,
+        deepseekR1DistillLlama_70b,
      ]),
      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      SETTINGS: JSON.stringify(defaultSettingJson),
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number

    // Run the process watchdog
-    const systemInfo = await systemInformation()
-    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
+    // const systemInfo = await systemInformation()
+    this.queue.add(() => executeOnMain(NODE, 'run'))
    this.queue.add(() => this.healthz())
    this.subscribeToEvents()

--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
 * Spawns a Nitro subprocess.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
-function run(systemInfo?: SystemInformation): Promise<any> {
+function run(): Promise<any> {
  log(`[CORTEX]:: Spawning cortex subprocess...`)

  return new Promise<void>(async (resolve, reject) => {
-    let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
-    let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}`
+    // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
+    let binaryName = `cortex-server${
+      process.platform === 'win32' ? '.exe' : ''
+    }`
    const binPath = path.join(__dirname, '..', 'bin')

    const executablePath = path.join(binPath, binaryName)
+
+    addEnvPaths(binPath)
+
    const sharedPath = path.join(appResourcePath(), 'shared')
    // Execute the binary
    log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
        `${path.join(dataFolderPath, '.janrc')}`,
        '--data_folder_path',
        dataFolderPath,
+        '--loglevel',
+        'INFO',
      ],
      {
        env: {
          ...process.env,
-          CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
-          ...(gpuVisibleDevices?.length > 0 && {
-            GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
-          }),
+          // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
+          // // Vulkan - Support 1 device at a time for now
+          // ...(gpuVisibleDevices?.length > 0 && {
+          //   GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
+          // }),
        },
        cwd: sharedPath,
      }
@ -71,6 +78,22 @@ function dispose() {
  watchdog?.terminate()
 }

+/**
+ * Set the environment paths for the cortex subprocess
+ * @param dest
+ */
+function addEnvPaths(dest: string) {
+  // Add engine path to the PATH and LD_LIBRARY_PATH
+  if (process.platform === 'win32') {
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
+  } else {
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      dest
+    )
+  }
+}
+
 /**
 * Cortex process info
 */
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -15,8 +15,6 @@ import {
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
-import { downloadModel } from './legacy/download'
-import { systemInformation } from '@janhq/core'
 import { deleteModelFiles } from './legacy/delete'

 export enum Settings {
@ -71,18 +69,6 @@ export default class JanModelExtension extends ModelExtension {
   * @returns A Promise that resolves when the model is downloaded.
   */
  async pullModel(model: string, id?: string, name?: string): Promise<void> {
-    if (id) {
-      const model: Model = ModelManager.instance().get(id)
-      // Clip vision model - should not be handled by cortex.cpp
-      // TensorRT model - should not be handled by cortex.cpp
-      if (
-        model &&
-        (model.engine === InferenceEngine.nitro_tensorrt_llm ||
-          model.settings.vision_model)
-      ) {
-        return downloadModel(model, (await systemInformation()).gpuSetting)
-      }
-    }
    /**
     * Sending POST to /models/pull/{id} endpoint to pull the model
     */
--- a/extensions/model-extension/src/legacy/download.ts
+++ b/extensions/model-extension/src/legacy/download.ts
@ -2,15 +2,12 @@ import {
  downloadFile,
  DownloadRequest,
  fs,
-  GpuSetting,
-  InferenceEngine,
  joinPath,
  Model,
 } from '@janhq/core'

 export const downloadModel = async (
  model: Model,
-  gpuSettings?: GpuSetting,
  network?: { ignoreSSL?: boolean; proxy?: string }
 ): Promise<void> => {
  const homedir = 'file://models'
@ -27,41 +24,6 @@ export const downloadModel = async (
      JSON.stringify(model, null, 2)
    )

-  if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
-    if (!gpuSettings || gpuSettings.gpus.length === 0) {
-      console.error('No GPU found. Please check your GPU setting.')
-      return
-    }
-    const firstGpu = gpuSettings.gpus[0]
-    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
-      console.error('No Nvidia GPU found. Please check your GPU setting.')
-      return
-    }
-    const gpuArch = firstGpu.arch
-    if (gpuArch === undefined) {
-      console.error('No GPU architecture found. Please check your GPU setting.')
-      return
-    }
-
-    if (!supportedGpuArch.includes(gpuArch)) {
-      console.debug(
-        `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
-      )
-      return
-    }
-
-    const os = 'windows' // TODO: remove this hard coded value
-
-    const newSources = model.sources.map((source) => {
-      const newSource = { ...source }
-      newSource.url = newSource.url
-        .replace(/<os>/g, os)
-        .replace(/<gpuarch>/g, gpuArch)
-      return newSource
-    })
-    model.sources = newSources
-  }
-
  console.debug(`Download sources: ${JSON.stringify(model.sources)}`)

  if (model.sources.length > 1) {
--- a/extensions/monitoring-extension/README.md
+++ b/extensions/monitoring-extension/README.md
@ -1,75 +0,0 @@
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/monitoring-extension/bin/.gitkeep
+++ b/extensions/monitoring-extension/bin/.gitkeep
--- a/extensions/monitoring-extension/download.bat
+++ b/extensions/monitoring-extension/download.bat
@ -1,2 +0,0 @@
-@echo off
-.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@ -1,49 +0,0 @@
-{
-  "name": "@janhq/monitoring-extension",
-  "productName": "System Monitoring",
-  "version": "1.0.10",
-  "description": "Provides system health and OS level data.",
-  "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
-    "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
-    "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
-    "download-artifacts:win32": "download.bat",
-    "download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "exports": {
-    ".": "./dist/index.js",
-    "./main": "./dist/node/index.cjs.js"
-  },
-  "devDependencies": {
-    "@types/node": "^20.11.4",
-    "@types/node-os-utils": "^1.3.4",
-    "cpx": "^1.5.0",
-    "download-cli": "^1.1.1",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
-    "typescript": "^5.3.3"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "node-os-utils": "^1.3.7"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "node-os-utils",
-    "@janhq/core"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
--- a/extensions/monitoring-extension/resources/settings.json
+++ b/extensions/monitoring-extension/resources/settings.json
@ -1,22 +0,0 @@
-[
-  {
-    "key": "log-enabled",
-    "title": "Enable App Logs",
-    "description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
-    "controllerType": "checkbox",
-    "controllerProps": {
-      "value": true
-    }
-  },
-  {
-    "key": "log-cleaning-interval",
-    "title": "Log Cleaning Interval",
-    "description": "Automatically delete local logs after a certain time interval (in milliseconds).",
-    "controllerType": "input",
-    "controllerProps": {
-      "value": "120000",
-      "placeholder": "Interval in milliseconds. E.g. 120000",
-      "textAlign": "right"
-    }
-  }
-]
--- a/extensions/monitoring-extension/rolldown.config.mjs
+++ b/extensions/monitoring-extension/rolldown.config.mjs
@ -1,32 +0,0 @@
-import { defineConfig } from 'rolldown'
-import packageJson from './package.json' with { type: 'json' }
-import settingJson from './resources/settings.json' with { type: 'json' }
-
-export default defineConfig([
-  {
-    input: 'src/index.ts',
-    output: {
-      format: 'esm',
-      file: 'dist/index.js',
-    },
-    platform: 'browser',
-    define: {
-      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
-      SETTINGS: JSON.stringify(settingJson),
-    },
-  },
-  {
-    input: 'src/node/index.ts',
-    external: ['@janhq/core/node'],
-    output: {
-      format: 'cjs',
-      file: 'dist/node/index.cjs.js',
-      sourcemap: false,
-      inlineDynamicImports: true,
-    },
-    resolve: {
-      extensions: ['.js', '.ts', '.json'],
-    },
-    platform: 'node',
-  },
-])
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@ -1,19 +0,0 @@
-declare const NODE: string
-declare const SETTINGS: SettingComponentProps[]
-
-type CpuGpuInfo = {
-  cpu: {
-    usage: number
-  }
-  gpu: GpuInfo[]
-}
-
-type GpuInfo = {
-  id: string
-  name: string
-  temperature: string
-  utilization: string
-  memoryTotal: string
-  memoryFree: string
-  memoryUtilization: string
-}
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,90 +0,0 @@
-import {
-  AppConfigurationEventName,
-  GpuSetting,
-  MonitoringExtension,
-  OperatingSystemInfo,
-  events,
-  executeOnMain,
-} from '@janhq/core'
-
-enum Settings {
-  logEnabled = 'log-enabled',
-  logCleaningInterval = 'log-cleaning-interval',
-}
-/**
- * JanMonitoringExtension is a extension that provides system monitoring functionality.
- * It implements the MonitoringExtension interface from the @janhq/core package.
- */
-export default class JanMonitoringExtension extends MonitoringExtension {
-  /**
-   * Called when the extension is loaded.
-   */
-  async onLoad() {
-    // Register extension settings
-    this.registerSettings(SETTINGS)
-
-    const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
-    const logCleaningInterval = parseInt(
-      await this.getSetting<string>(Settings.logCleaningInterval, '120000')
-    )
-    // Register File Logger provided by this extension
-    await executeOnMain(NODE, 'registerLogger', {
-      logEnabled,
-      logCleaningInterval: isNaN(logCleaningInterval)
-        ? 120000
-        : logCleaningInterval,
-    })
-
-    // Attempt to fetch nvidia info
-    await executeOnMain(NODE, 'updateNvidiaInfo')
-    events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
-  }
-
-  onSettingUpdate<T>(key: string, value: T): void {
-    if (key === Settings.logEnabled) {
-      executeOnMain(NODE, 'updateLogger', { logEnabled: value })
-    } else if (key === Settings.logCleaningInterval) {
-      executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
-    }
-  }
-
-  /**
-   * Called when the extension is unloaded.
-   */
-  onUnload(): void {
-    // Register File Logger provided by this extension
-    executeOnMain(NODE, 'unregisterLogger')
-  }
-
-  /**
-   * Returns the GPU configuration.
-   * @returns A Promise that resolves to an object containing the GPU configuration.
-   */
-  async getGpuSetting(): Promise<GpuSetting | undefined> {
-    return executeOnMain(NODE, 'getGpuConfig')
-  }
-
-  /**
-   * Returns information about the system resources.
-   * @returns A Promise that resolves to an object containing information about the system resources.
-   */
-  getResourcesInfo(): Promise<any> {
-    return executeOnMain(NODE, 'getResourcesInfo')
-  }
-
-  /**
-   * Returns information about the current system load.
-   * @returns A Promise that resolves to an object containing information about the current system load.
-   */
-  getCurrentLoad(): Promise<any> {
-    return executeOnMain(NODE, 'getCurrentLoad')
-  }
-
-  /**
-   * Returns information about the OS
-   * @returns
-   */
-  getOsInfo(): Promise<OperatingSystemInfo> {
-    return executeOnMain(NODE, 'getOsInfo')
-  }
-}
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -1,389 +0,0 @@
-import {
-  GpuSetting,
-  GpuSettingInfo,
-  LoggerManager,
-  OperatingSystemInfo,
-  ResourceInfo,
-  SupportedPlatforms,
-  getJanDataFolderPath,
-  log,
-} from '@janhq/core/node'
-import { mem, cpu } from 'node-os-utils'
-import { exec } from 'child_process'
-import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
-import path from 'path'
-import os from 'os'
-import { FileLogger } from './logger'
-
-/**
- * Path to the settings directory
- **/
-export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
-/**
- * Path to the settings file
- **/
-export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
-
-/**
- * Default GPU settings
- * TODO: This needs to be refactored to support multiple accelerators
- **/
-const DEFAULT_SETTINGS: GpuSetting = {
-  notify: true,
-  run_mode: 'cpu',
-  nvidia_driver: {
-    exist: false,
-    version: '',
-  },
-  cuda: {
-    exist: false,
-    version: '',
-  },
-  gpus: [],
-  gpu_highest_vram: '',
-  gpus_in_use: [],
-  is_initial: true,
-  // TODO: This needs to be set based on user toggle in settings
-  vulkan: false,
-}
-
-export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
-  if (process.platform === 'darwin') return undefined
-  if (existsSync(GPU_INFO_FILE))
-    return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-  return DEFAULT_SETTINGS
-}
-
-export const getResourcesInfo = async (): Promise<ResourceInfo> => {
-  const ramUsedInfo = await mem.used()
-  const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
-  const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
-
-  const resourceInfo: ResourceInfo = {
-    mem: {
-      totalMemory,
-      usedMemory,
-    },
-  }
-
-  return resourceInfo
-}
-
-export const getCurrentLoad = () =>
-  new Promise<CpuGpuInfo>(async (resolve, reject) => {
-    const cpuPercentage = await cpu.usage()
-    let data = {
-      run_mode: 'cpu',
-      gpus_in_use: [],
-    }
-
-    if (process.platform !== 'darwin') {
-      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-    }
-
-    if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
-      const gpuIds = data.gpus_in_use.join(',')
-      if (gpuIds !== '' && data['vulkan'] !== true) {
-        exec(
-          `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
-          (error, stdout, _) => {
-            if (error) {
-              console.error(`exec error: ${error}`)
-              throw new Error(error.message)
-            }
-            const gpuInfo: GpuInfo[] = stdout
-              .trim()
-              .split('\n')
-              .map((line) => {
-                const [
-                  id,
-                  name,
-                  temperature,
-                  utilization,
-                  memoryTotal,
-                  memoryFree,
-                  memoryUtilization,
-                ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
-                return {
-                  id,
-                  name,
-                  temperature,
-                  utilization,
-                  memoryTotal,
-                  memoryFree,
-                  memoryUtilization,
-                }
-              })
-
-            resolve({
-              cpu: { usage: cpuPercentage },
-              gpu: gpuInfo,
-            })
-          }
-        )
-      } else {
-        // Handle the case where gpuIds is empty
-        resolve({
-          cpu: { usage: cpuPercentage },
-          gpu: [],
-        })
-      }
-    } else {
-      // Handle the case where run_mode is not 'gpu' or no GPUs are in use
-      resolve({
-        cpu: { usage: cpuPercentage },
-        gpu: [],
-      })
-    }
-  })
-
-/**
- * This will retrieve GPU information and persist settings.json
- * Will be called when the extension is loaded to turn on GPU acceleration if supported
- */
-export const updateNvidiaInfo = async () => {
-  // ignore if macos
-  if (process.platform === 'darwin') return
-
-  try {
-    JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-  } catch (error) {
-    if (!existsSync(SETTINGS_DIR)) {
-      mkdirSync(SETTINGS_DIR, {
-        recursive: true,
-      })
-    }
-    writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
-  }
-
-  await updateNvidiaDriverInfo()
-  await updateGpuInfo()
-}
-
-const updateNvidiaDriverInfo = async () =>
-  new Promise((resolve, reject) => {
-    exec(
-      'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
-      (error, stdout) => {
-        const data: GpuSetting = JSON.parse(
-          readFileSync(GPU_INFO_FILE, 'utf-8')
-        )
-
-        if (!error) {
-          const firstLine = stdout.split('\n')[0].trim()
-          data.nvidia_driver.exist = true
-          data.nvidia_driver.version = firstLine
-        } else {
-          data.nvidia_driver.exist = false
-        }
-
-        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-        resolve({})
-      }
-    )
-  })
-
-const getGpuArch = (gpuName: string): string => {
-  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
-
-  if (gpuName.includes('30')) return 'ampere'
-  else if (gpuName.includes('40')) return 'ada'
-  else return 'unknown'
-}
-
-const updateGpuInfo = async () =>
-  new Promise((resolve, reject) => {
-    let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
-
-    // Cuda
-    if (data.vulkan === true) {
-      // Vulkan
-      exec(
-        process.platform === 'win32'
-          ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
-          : `${__dirname}/../bin/vulkaninfo --summary`,
-        async (error, stdout) => {
-          if (!error) {
-            const output = stdout.toString()
-
-            log(output)
-            const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
-
-            const gpus: GpuSettingInfo[] = []
-            let match
-            while ((match = gpuRegex.exec(output)) !== null) {
-              const id = match[1]
-              const name = match[2]
-              const arch = getGpuArch(name)
-              gpus.push({ id, vram: '0', name, arch })
-            }
-            data.gpus = gpus
-
-            if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
-              data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
-            }
-
-            data = await updateCudaExistence(data)
-            writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-            log(`[APP]::${JSON.stringify(data)}`)
-            resolve({})
-          } else {
-            reject(error)
-          }
-        }
-      )
-    } else {
-      exec(
-        'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
-        async (error, stdout) => {
-          if (!error) {
-            log(`[SPECS]::${stdout}`)
-            // Get GPU info and gpu has higher memory first
-            let highestVram = 0
-            let highestVramId = '0'
-            const gpus: GpuSettingInfo[] = stdout
-              .trim()
-              .split('\n')
-              .map((line) => {
-                let [id, vram, name] = line.split(', ')
-                const arch = getGpuArch(name)
-                vram = vram.replace(/\r/g, '')
-                if (parseFloat(vram) > highestVram) {
-                  highestVram = parseFloat(vram)
-                  highestVramId = id
-                }
-                return { id, vram, name, arch }
-              })
-
-            data.gpus = gpus
-            data.gpu_highest_vram = highestVramId
-          } else {
-            data.gpus = []
-            data.gpu_highest_vram = undefined
-          }
-
-          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
-            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
-          }
-
-          data = await updateCudaExistence(data)
-          console.log('[MONITORING]::Cuda info: ', data)
-          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
-          log(`[APP]::${JSON.stringify(data)}`)
-          resolve({})
-        }
-      )
-    }
-  })
-
-/**
- * Check if file exists in paths
- */
-const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
-  return paths.some((p) => existsSync(path.join(p, file)))
-}
-
-/**
- * Validate cuda for linux and windows
- */
-const updateCudaExistence = async (
-  data: GpuSetting = DEFAULT_SETTINGS
-): Promise<GpuSetting> => {
-  let filesCuda12: string[]
-  let filesCuda11: string[]
-  let paths: string[]
-  let cudaVersion: string = ''
-
-  if (process.platform === 'win32') {
-    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
-    filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
-    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
-  } else {
-    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
-    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
-    paths = process.env.LD_LIBRARY_PATH
-      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
-      : []
-    paths.push('/usr/lib/x86_64-linux-gnu/')
-  }
-
-  let cudaExists = filesCuda12.every(
-    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
-  )
-
-  if (!cudaExists) {
-    cudaExists = filesCuda11.every(
-      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
-    )
-    if (cudaExists) {
-      cudaVersion = '11'
-    }
-  } else {
-    cudaVersion = '12'
-  }
-
-  data.cuda.exist = cudaExists
-  data.cuda.version = cudaVersion
-
-  console.debug(data.is_initial, data.gpus_in_use)
-
-  if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
-    data.run_mode = 'gpu'
-  }
-
-  data.is_initial = false
-
-  // Attempt to query CUDA using NVIDIA SMI
-  if (!cudaExists) {
-    await new Promise<void>((resolve) => {
-      exec('nvidia-smi', (error, stdout) => {
-        if (!error) {
-          const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
-          const match = regex.exec(stdout)
-          if (match && match[1]) {
-            data.cuda.version = match[1]
-          }
-        }
-        console.log('[MONITORING]::Finalized cuda info update: ', data)
-        resolve()
-      })
-    })
-  }
-  return data
-}
-
-export const getOsInfo = (): OperatingSystemInfo => {
-  const platform =
-    SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
-
-  const osInfo: OperatingSystemInfo = {
-    platform: platform,
-    arch: process.arch,
-    release: os.release(),
-    machine: os.machine(),
-    version: os.version(),
-    totalMem: os.totalmem(),
-    freeMem: os.freemem(),
-  }
-
-  return osInfo
-}
-
-export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
-  const logger = new FileLogger(logEnabled, logCleaningInterval)
-  LoggerManager.instance().register(logger)
-  logger.cleanLogs()
-}
-
-export const unregisterLogger = () => {
-  LoggerManager.instance().unregister('file')
-}
-
-export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
-  const logger = LoggerManager.instance().loggers.get('file') as FileLogger
-  if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
-  if (logger && logCleaningInterval)
-    logger.logCleaningInterval = logCleaningInterval
-  // Rerun
-  logger && logger.cleanLogs()
-}
--- a/server/cortex.json
+++ b/server/cortex.json
@ -5,77 +5,470 @@
      "post": {
        "operationId": "AssistantsController_create",
        "summary": "Create assistant",
-        "description": "Creates a new assistant.",
-        "parameters": [],
+        "description": "Creates a new assistant with the specified configuration.",
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
-                "$ref": "#/components/schemas/CreateAssistantDto"
+                "type": "object",
+                "properties": {
+                  "model": {
+                    "type": "string",
+                    "description": "The model identifier to use for the assistant."
+                  },
+                  "name": {
+                    "type": "string",
+                    "description": "The name of the assistant."
+                  },
+                  "description": {
+                    "type": "string",
+                    "description": "The description of the assistant."
+                  },
+                  "instructions": {
+                    "type": "string",
+                    "description": "Instructions for the assistant's behavior."
+                  },
+                  "tools": {
+                    "type": "array",
+                    "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "type": {
+                          "type": "string",
+                          "enum": [
+                            "code_interpreter",
+                            "file_search",
+                            "function"
+                          ]
                        }
                      }
                    }
                  },
-        "responses": {
-          "201": {
-            "description": "The assistant has been successfully created."
+                  "tool_resources": {
+                    "type": "object",
+                    "description": "Resources used by the assistant's tools.",
+                    "properties": {
+                      "code_interpreter": {
+                        "type": "object"
+                      },
+                      "file_search": {
+                        "type": "object"
+                      }
                    }
                  },
-        "tags": ["Assistants"]
+                  "metadata": {
+                    "type": "object",
+                    "description": "Set of key-value pairs for the assistant.",
+                    "additionalProperties": true
                  },
-      "get": {
-        "operationId": "AssistantsController_findAll",
-        "summary": "List assistants",
-        "description": "Returns a list of assistants.",
-        "parameters": [
+                  "temperature": {
+                    "type": "number",
+                    "format": "float",
+                    "description": "Temperature parameter for response generation."
+                  },
+                  "top_p": {
+                    "type": "number",
+                    "format": "float",
+                    "description": "Top p parameter for response generation."
+                  },
+                  "response_format": {
+                    "oneOf": [
                      {
-            "name": "limit",
-            "required": false,
-            "in": "query",
-            "description": "A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.",
-            "schema": {
-              "type": "number"
-            }
+                        "type": "string",
+                        "enum": ["auto"]
                      },
                      {
-            "name": "order",
-            "required": false,
-            "in": "query",
-            "description": "Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.",
-            "schema": {
-              "type": "string"
+                        "type": "object"
+                      }
+                    ]
                  }
                },
-          {
-            "name": "after",
-            "required": false,
-            "in": "query",
-            "description": "A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.",
-            "schema": {
-              "type": "string"
+                "required": ["model"]
+              }
+            }
          }
        },
-          {
-            "name": "before",
-            "required": false,
-            "in": "query",
-            "description": "A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.",
-            "schema": {
-              "type": "string"
-            }
-          }
-        ],
        "responses": {
          "200": {
            "description": "Ok",
            "content": {
              "application/json": {
                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "The unique identifier of the assistant."
+                    },
+                    "object": {
+                      "type": "string",
+                      "enum": ["assistant"],
+                      "description": "The object type, which is always 'assistant'."
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp (in seconds) of when the assistant was created."
+                    },
+                    "model": {
+                      "type": "string",
+                      "description": "The model identifier used by the assistant."
+                    },
+                    "name": {
+                      "type": "string",
+                      "description": "The name of the assistant."
+                    },
+                    "description": {
+                      "type": "string",
+                      "description": "The description of the assistant."
+                    },
+                    "instructions": {
+                      "type": "string",
+                      "description": "Instructions for the assistant's behavior."
+                    },
+                    "tools": {
+                      "type": "array",
+                      "description": "A list of tools enabled on the assistant.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "enum": [
+                              "code_interpreter",
+                              "file_search",
+                              "function"
+                            ]
+                          }
+                        }
+                      }
+                    },
+                    "tool_resources": {
+                      "type": "object",
+                      "description": "Resources used by the assistant's tools.",
+                      "properties": {
+                        "code_interpreter": {
+                          "type": "object"
+                        },
+                        "file_search": {
+                          "type": "object"
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Set of key-value pairs that can be attached to the assistant.",
+                      "additionalProperties": true
+                    },
+                    "temperature": {
+                      "type": "number",
+                      "format": "float",
+                      "description": "Temperature parameter for response generation."
+                    },
+                    "top_p": {
+                      "type": "number",
+                      "format": "float",
+                      "description": "Top p parameter for response generation."
+                    },
+                    "response_format": {
+                      "oneOf": [
+                        {
+                          "type": "string",
+                          "enum": ["auto"]
+                        },
+                        {
+                          "type": "object"
+                        }
+                      ]
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "model",
+                    "metadata"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "patch": {
+        "operationId": "AssistantsController_update",
+        "summary": "Update assistant",
+        "description": "Updates an assistant. Requires at least one modifiable field.",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the assistant.",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "OpenAI-Beta",
+            "required": true,
+            "in": "header",
+            "description": "Beta feature header.",
+            "schema": {
+              "type": "string",
+              "enum": ["assistants=v2"]
+            }
+          }
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "model": {
+                    "type": "string",
+                    "description": "The model identifier to use for the assistant."
+                  },
+                  "name": {
+                    "type": "string",
+                    "description": "The name of the assistant."
+                  },
+                  "description": {
+                    "type": "string",
+                    "description": "The description of the assistant."
+                  },
+                  "instructions": {
+                    "type": "string",
+                    "description": "Instructions for the assistant's behavior."
+                  },
+                  "tools": {
+                    "type": "array",
+                    "description": "A list of tools enabled on the assistant. Maximum of 128 tools.",
+                    "items": {
+                      "type": "object",
+                      "properties": {
+                        "type": {
+                          "type": "string",
+                          "enum": [
+                            "code_interpreter",
+                            "file_search",
+                            "function"
+                          ]
+                        }
+                      }
+                    }
+                  },
+                  "tool_resources": {
+                    "type": "object",
+                    "description": "Resources used by the assistant's tools.",
+                    "properties": {
+                      "code_interpreter": {
+                        "type": "object"
+                      },
+                      "file_search": {
+                        "type": "object"
+                      }
+                    }
+                  },
+                  "metadata": {
+                    "type": "object",
+                    "description": "Set of key-value pairs for the assistant.",
+                    "additionalProperties": true
+                  },
+                  "temperature": {
+                    "type": "number",
+                    "format": "float",
+                    "description": "Temperature parameter for response generation."
+                  },
+                  "top_p": {
+                    "type": "number",
+                    "format": "float",
+                    "description": "Top p parameter for response generation."
+                  },
+                  "response_format": {
+                    "oneOf": [
+                      {
+                        "type": "string",
+                        "enum": ["auto"]
+                      },
+                      {
+                        "type": "object"
+                      }
+                    ]
+                  }
+                },
+                "minProperties": 1
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "The unique identifier of the assistant."
+                    },
+                    "object": {
+                      "type": "string",
+                      "enum": ["assistant"],
+                      "description": "The object type, which is always 'assistant'."
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp (in seconds) of when the assistant was created."
+                    },
+                    "model": {
+                      "type": "string",
+                      "description": "The model identifier used by the assistant."
+                    },
+                    "name": {
+                      "type": "string",
+                      "description": "The name of the assistant."
+                    },
+                    "description": {
+                      "type": "string",
+                      "description": "The description of the assistant."
+                    },
+                    "instructions": {
+                      "type": "string",
+                      "description": "Instructions for the assistant's behavior."
+                    },
+                    "tools": {
+                      "type": "array",
+                      "description": "A list of tools enabled on the assistant.",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "enum": [
+                              "code_interpreter",
+                              "file_search",
+                              "function"
+                            ]
+                          }
+                        }
+                      }
+                    },
+                    "tool_resources": {
+                      "type": "object",
+                      "description": "Resources used by the assistant's tools.",
+                      "properties": {
+                        "code_interpreter": {
+                          "type": "object"
+                        },
+                        "file_search": {
+                          "type": "object"
+                        }
+                      }
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Set of key-value pairs that can be attached to the assistant.",
+                      "additionalProperties": true
+                    },
+                    "temperature": {
+                      "type": "number",
+                      "format": "float",
+                      "description": "Temperature parameter for response generation."
+                    },
+                    "top_p": {
+                      "type": "number",
+                      "format": "float",
+                      "description": "Top p parameter for response generation."
+                    },
+                    "response_format": {
+                      "oneOf": [
+                        {
+                          "type": "string",
+                          "enum": ["auto"]
+                        },
+                        {
+                          "type": "object"
+                        }
+                      ]
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "model",
+                    "metadata"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "get": {
+        "operationId": "AssistantsController_list",
+        "summary": "List assistants",
+        "description": "Returns a list of assistants.",
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "object": {
+                      "type": "string",
+                      "enum": ["list"],
+                      "description": "The object type, which is always 'list' for a list response."
+                    },
+                    "data": {
                      "type": "array",
                      "items": {
-                    "$ref": "#/components/schemas/AssistantEntity"
+                        "type": "object",
+                        "properties": {
+                          "id": {
+                            "type": "string",
+                            "description": "The unique identifier of the assistant."
+                          },
+                          "object": {
+                            "type": "string",
+                            "enum": ["assistant"],
+                            "description": "The object type, which is always 'assistant'."
+                          },
+                          "created_at": {
+                            "type": "integer",
+                            "description": "Unix timestamp (in seconds) of when the assistant was created."
+                          },
+                          "model": {
+                            "type": "string",
+                            "description": "The model identifier used by the assistant."
+                          },
+                          "metadata": {
+                            "type": "object",
+                            "description": "Set of key-value pairs that can be attached to the assistant.",
+                            "additionalProperties": true
                          }
+                        },
+                        "required": [
+                          "id",
+                          "object",
+                          "created_at",
+                          "model",
+                          "metadata"
+                        ]
+                      }
+                    }
+                  },
+                  "required": ["object", "data"]
                }
              }
            }
@ -88,7 +481,77 @@
      "get": {
        "operationId": "AssistantsController_findOne",
        "summary": "Get assistant",
-        "description": "Retrieves a specific assistant defined by an assistant's `id`.",
+        "description": "Retrieves a specific assistant by ID.",
+        "parameters": [
+          {
+            "name": "id",
+            "required": true,
+            "in": "path",
+            "description": "The unique identifier of the assistant.",
+            "schema": {
+              "type": "string"
+            }
+          },
+          {
+            "name": "OpenAI-Beta",
+            "required": true,
+            "in": "header",
+            "description": "Beta feature header.",
+            "schema": {
+              "type": "string",
+              "enum": ["assistants=v2"]
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Ok",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "The unique identifier of the assistant."
+                    },
+                    "object": {
+                      "type": "string",
+                      "enum": ["assistant"],
+                      "description": "The object type, which is always 'assistant'."
+                    },
+                    "created_at": {
+                      "type": "integer",
+                      "description": "Unix timestamp (in seconds) of when the assistant was created."
+                    },
+                    "model": {
+                      "type": "string",
+                      "description": "The model identifier used by the assistant."
+                    },
+                    "metadata": {
+                      "type": "object",
+                      "description": "Set of key-value pairs attached to the assistant.",
+                      "additionalProperties": true
+                    }
+                  },
+                  "required": [
+                    "id",
+                    "object",
+                    "created_at",
+                    "model",
+                    "metadata"
+                  ]
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Assistants"]
+      },
+      "delete": {
+        "operationId": "AssistantsController_remove",
+        "summary": "Delete assistant",
+        "description": "Deletes a specific assistant by ID.",
        "parameters": [
          {
            "name": "id",
@ -106,36 +569,24 @@
            "content": {
              "application/json": {
                "schema": {
-                  "$ref": "#/components/schemas/AssistantEntity"
-                }
-              }
-            }
+                  "type": "object",
+                  "properties": {
+                    "id": {
+                      "type": "string",
+                      "description": "The unique identifier of the deleted assistant."
+                    },
+                    "object": {
+                      "type": "string",
+                      "enum": ["assistant.deleted"],
+                      "description": "The object type for a deleted assistant."
+                    },
+                    "deleted": {
+                      "type": "boolean",
+                      "enum": [true],
+                      "description": "Indicates the assistant was successfully deleted."
                    }
                  },
-        "tags": ["Assistants"]
-      },
-      "delete": {
-        "operationId": "AssistantsController_remove",
-        "summary": "Delete assistant",
-        "description": "Deletes a specific assistant defined by an assistant's `id`.",
-        "parameters": [
-          {
-            "name": "id",
-            "required": true,
-            "in": "path",
-            "description": "The unique identifier of the assistant.",
-            "schema": {
-              "type": "string"
-            }
-          }
-        ],
-        "responses": {
-          "200": {
-            "description": "The assistant has been successfully deleted.",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/DeleteAssistantResponseDto"
+                  "required": ["id", "object", "deleted"]
                }
              }
            }
@ -2199,6 +2650,84 @@
        "tags": ["Engines"]
      }
    },
+    "/engines/{name}/releases/{version}": {
+      "get": {
+        "summary": "List variants for a specific engine version",
+        "description": "Lists all available variants (builds) for a specific version of an engine. Variants can include different CPU architectures (AVX, AVX2, AVX512), GPU support (CUDA, Vulkan), and operating systems (Windows, Linux, macOS).",
+        "parameters": [
+          {
+            "name": "name",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "enum": ["llama-cpp", "onnxruntime", "tensorrt-llm"],
+              "default": "llama-cpp"
+            },
+            "description": "The type of engine"
+          },
+          {
+            "name": "version",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "The version of the engine"
+          },
+          {
+            "name": "show",
+            "in": "query",
+            "required": false,
+            "schema": {
+              "type": "string",
+              "enum": ["all", "compatible"],
+              "default": "all"
+            },
+            "description": "Filter the variants list. Use 'compatible' to show only variants compatible with the current system, or 'all' to show all available variants."
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "Successfully retrieved variants list",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "array",
+                  "items": {
+                    "type": "object",
+                    "properties": {
+                      "name": {
+                        "type": "string",
+                        "description": "The name of the variant, including OS, architecture, and capabilities",
+                        "example": "linux-amd64-avx-cuda-11-7"
+                      },
+                      "created_at": {
+                        "type": "string",
+                        "format": "date-time",
+                        "description": "Creation timestamp of the variant",
+                        "example": "2024-11-13T04:51:16Z"
+                      },
+                      "size": {
+                        "type": "integer",
+                        "description": "Size of the variant in bytes",
+                        "example": 151224604
+                      },
+                      "download_count": {
+                        "type": "integer",
+                        "description": "Number of times this variant has been downloaded",
+                        "example": 0
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        },
+        "tags": ["Engines"]
+      }
+    },
    "/engines/{name}/releases/latest": {
      "get": {
        "summary": "Get latest release",
@ -2314,7 +2843,7 @@
                      "get_models_url": {
                        "type": "string",
                        "description": "The URL to get models",
-                        "example": "https://api.openai.com/v1/models"
+                        "example": "https://api.openai.com/models"
                      }
                    }
                  }
@ -3378,6 +3907,7 @@
        "Files",
        "Hardware",
        "Events",
+        "Assistants",
        "Threads",
        "Messages",
        "Pulling Models",
@ -4858,8 +5388,8 @@
          "engine",
          "version",
          "inference_params",
-          "TransformReq",
-          "TransformResp",
+          "transform_req",
+          "transform_resp",
          "metadata"
        ],
        "properties": {
@ -4867,9 +5397,9 @@
            "type": "string",
            "description": "The identifier of the model."
          },
-          "api_key_template": {
+          "header_template": {
            "type": "string",
-            "description": "Template for the API key header."
+            "description": "Template for the header."
          },
          "engine": {
            "type": "string",
@ -4902,7 +5432,7 @@
              }
            }
          },
-          "TransformReq": {
+          "transform_req": {
            "type": "object",
            "properties": {
              "get_models": {
@ -4924,7 +5454,7 @@
              }
            }
          },
-          "TransformResp": {
+          "transform_resp": {
            "type": "object",
            "properties": {
              "chat_completions": {
@ -5632,9 +6162,9 @@
            "description": "Number of GPU layers.",
            "example": 33
          },
-          "api_key_template": {
+          "header_template": {
            "type": "string",
-            "description": "Template for the API key header."
+            "description": "Template for the header."
          },
          "version": {
            "type": "string",
--- a/web/containers/AutoLink/index.tsx
+++ b/web/containers/AutoLink/index.tsx
@ -10,7 +10,9 @@ const AutoLink = ({ text }: Props) => {

  return (
    <>
-      {text.split(delimiter).map((word) => {
+      {text &&
+        typeof text === 'string' &&
+        text.split(delimiter).map((word) => {
          const match = word.match(delimiter)
          if (match) {
            const url = match[0]
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@ -23,7 +23,13 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'

-const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
+const ErrorMessage = ({
+  message,
+  errorComponent,
+}: {
+  message?: ThreadMessage
+  errorComponent?: React.ReactNode
+}) => {
  const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
  const setMainState = useSetAtom(mainViewStateAtom)
  const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
@ -50,7 +56,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
  const getErrorTitle = () => {
    const engine = getEngine()

-    switch (message.metadata?.error_code) {
+    switch (message?.metadata?.error_code) {
      case ErrorCode.InvalidApiKey:
      case ErrorCode.AuthenticationError:
        return (
@ -61,7 +67,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
                className="font-medium text-[hsla(var(--app-link))] underline"
                onClick={() => {
                  setMainState(MainViewState.Settings)
-                  engine?.name && setSelectedSettingScreen(engine.name)
+                  setSelectedSettingScreen(activeAssistant?.model?.engine ?? '')
                }}
              >
                Settings
@ -77,7 +83,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
            data-testid="passthrough-error-message"
            className="first-letter:uppercase"
          >
-            {message.content[0]?.text?.value === 'Failed to fetch' &&
+            {message?.content[0]?.text?.value === 'Failed to fetch' &&
            engine &&
            engine?.name !== InferenceEngine.cortex_llamacpp ? (
              <span>
@ -89,6 +95,9 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
                {message?.content[0]?.text?.value && (
                  <AutoLink text={message?.content[0]?.text?.value} />
                )}
+                {!message?.content[0]?.text?.value && (
+                  <span>Something went wrong. Please try again.</span>
+                )}
              </>
            )}
          </p>
@ -100,12 +109,15 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
    <div className="mx-auto my-6 max-w-[700px] px-4">
      <div
        className="mx-auto  max-w-[400px] rounded-lg border border-[hsla(var(--app-border))]"
-        key={message.id}
+        key={message?.id}
      >
        <div className="flex justify-between border-b border-inherit px-4 py-2">
-          <h6 className="text-[hsla(var(--destructive-bg))]">Error</h6>
-          <div className="flex gap-x-4 text-xs">
-            <div>
+          <h6 className="flex items-center gap-x-1 font-semibold text-[hsla(var(--destructive-bg))]">
+            <span className="h-2 w-2 rounded-full bg-[hsla(var(--destructive-bg))]" />
+            <span>Error</span>
+          </h6>
+          <div className="flex items-center gap-x-4 text-xs">
+            <div className="font-semibold">
              <span
                className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--app-link))]"
                onClick={() => setModalTroubleShooting(true)}
@ -116,7 +128,7 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
              <ModalTroubleShooting />
            </div>
            <div
-              className="flex cursor-pointer items-center gap-x-1 text-[hsla(var(--text-secondary))]"
+              className="flex cursor-pointer items-center gap-x-1 font-semibold text-[hsla(var(--text-secondary))]"
              onClick={handleCopy}
            >
              {copied ? (
@ -138,10 +150,10 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
        </div>
        <div className="max-h-[80px] w-full overflow-x-auto p-4 py-2">
          <div
-            className="text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
+            className="font-serif text-xs leading-relaxed text-[hsla(var(--text-secondary))]"
            ref={errorDivRef}
          >
-            {getErrorTitle()}
+            {errorComponent ? errorComponent : getErrorTitle()}
          </div>
        </div>
      </div>
--- a/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/SystemMonitor.test.tsx
@ -87,7 +87,7 @@ describe('SystemMonitor', () => {

    expect(screen.getByText('Running Models')).toBeInTheDocument()
    expect(screen.getByText('App Log')).toBeInTheDocument()
-    expect(screen.getByText('7.45/14.90 GB')).toBeInTheDocument()
+    expect(screen.getByText('7.45GB / 14.90GB')).toBeInTheDocument()
    expect(screen.getByText('30%')).toBeInTheDocument()
  })

--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@ -134,7 +134,7 @@ const SystemMonitor = () => {
                <div className="flex items-center justify-between gap-2">
                  <h6 className="font-bold">Memory</h6>
                  <span>
-                    {toGigabytes(usedRam, { hideUnit: true })}/
+                    {toGigabytes(usedRam, { hideUnit: true })}GB /{' '}
                    {toGigabytes(totalRam, { hideUnit: true })}GB
                  </span>
                </div>
@ -149,10 +149,12 @@ const SystemMonitor = () => {
              </div>
              {gpus.length > 0 && (
                <div className="mb-4 border-b border-[hsla(var(--app-border))] pb-4 last:border-none">
-                  {gpus.map((gpu, index) => {
+                  {gpus
+                    .filter((gpu) => gpu.activated === true)
+                    .map((gpu, index) => {
                      const gpuUtilization = utilizedMemory(
-                      gpu.memoryFree,
-                      gpu.memoryTotal
+                        gpu.free_vram,
+                        gpu.total_vram
                      )
                      return (
                        <div key={index} className="mt-4 flex flex-col gap-x-2">
@ -163,8 +165,8 @@ const SystemMonitor = () => {
                            <div className="flex gap-x-2">
                              <div className="">
                                <span>
-                                {gpu.memoryTotal - gpu.memoryFree}/
-                                {gpu.memoryTotal}
+                                  {gpu.total_vram - gpu.free_vram}/
+                                  {gpu.total_vram}
                                </span>
                                <span> MB</span>
                              </div>
--- a/web/containers/Layout/index.tsx
+++ b/web/containers/Layout/index.tsx
@ -25,6 +25,8 @@ import ImportModelOptionModal from '@/screens/Settings/ImportModelOptionModal'
 import ImportingModelModal from '@/screens/Settings/ImportingModelModal'
 import SelectingModelModal from '@/screens/Settings/SelectingModelModal'

+import { getAppDistinctId, updateDistinctId } from '@/utils/settings'
+
 import LoadingModal from '../LoadingModal'

 import MainViewContainer from '../MainViewContainer'
@ -96,8 +98,16 @@ const BaseLayout = () => {
          return properties
        },
      })
+      // Attempt to restore distinct Id from app global settings
+      getAppDistinctId()
+        .then((id) => {
+          if (id) posthog.identify(id)
+        })
+        .finally(() => {
          posthog.opt_in_capturing()
          posthog.register({ app_version: VERSION })
+          updateDistinctId(posthog.get_distinct_id())
+        })
    } else {
      posthog.opt_out_capturing()
    }
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@ -28,6 +28,8 @@ import ModelLabel from '@/containers/ModelLabel'

 import SetupRemoteModel from '@/containers/SetupRemoteModel'

+import { useActiveModel } from '@/hooks/useActiveModel'
+
 import { useCreateNewThread } from '@/hooks/useCreateNewThread'
 import useDownloadModel from '@/hooks/useDownloadModel'
 import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
@ -40,7 +42,7 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
 import { formatDownloadPercentage, toGigabytes } from '@/utils/converter'

 import { manualRecommendationModel } from '@/utils/model'
-import { getLogoEngine } from '@/utils/modelEngine'
+import { getLogoEngine, getTitleByEngine } from '@/utils/modelEngine'

 import { activeAssistantAtom } from '@/helpers/atoms/Assistant.atom'
 import {
@ -93,6 +95,7 @@ const ModelDropdown = ({
  const { updateModelParameter } = useUpdateModelParameters()
  const searchInputRef = useRef<HTMLInputElement>(null)
  const configuredModels = useAtomValue(configuredModelsAtom)
+  const { stopModel } = useActiveModel()

  const featuredModels = configuredModels.filter(
    (x) =>
@ -226,6 +229,7 @@ const ModelDropdown = ({
      const model = downloadedModels.find((m) => m.id === modelId)
      setSelectedModel(model)
      setOpen(false)
+      stopModel()

      if (activeThread) {
        // Change assistand tools based on model support RAG
@ -248,18 +252,13 @@ const ModelDropdown = ({
          ],
        })

-        const defaultContextLength = Math.min(
-          8192,
-          model?.settings.ctx_len ?? 8192
-        )
-
+        const contextLength = model?.settings.ctx_len
+          ? Math.min(8192, model?.settings.ctx_len ?? 8192)
+          : undefined
        const overriddenParameters = {
-          ctx_len: model?.settings.ctx_len ? defaultContextLength : undefined,
-          max_tokens: defaultContextLength
-            ? Math.min(
-                model?.parameters.max_tokens ?? 8192,
-                defaultContextLength
-              )
+          ctx_len: contextLength,
+          max_tokens: contextLength
+            ? Math.min(model?.parameters.max_tokens ?? 8192, contextLength)
            : model?.parameters.max_tokens,
        }

@ -289,6 +288,7 @@ const ModelDropdown = ({
      updateThreadMetadata,
      setThreadModelParams,
      updateModelParameter,
+      stopModel,
    ]
  )

@ -429,7 +429,7 @@ const ModelDropdown = ({
                            />
                          )}
                          <h6 className="font-medium capitalize text-[hsla(var(--text-secondary))]">
-                            {engine.name}
+                            {getTitleByEngine(engine.name)}
                          </h6>
                        </div>
                        <div className="-mr-2 flex gap-1">
@ -475,7 +475,7 @@ const ModelDropdown = ({
                                >
                                  <div className="flex items-center gap-2">
                                    <p
-                                      className="line-clamp-1 text-[hsla(var(--text-secondary))]"
+                                      className="max-w-[200px] overflow-hidden truncate whitespace-nowrap text-[hsla(var(--text-secondary))]"
                                      title={model.name}
                                    >
                                      {model.name}
@ -549,6 +549,8 @@ const ModelDropdown = ({
                              (c) => c.id === model.id
                            )
                            return (
+                              <>
+                                {isDownloaded && (
                                  <li
                                    key={model.id}
                                    className={twMerge(
@ -558,7 +560,10 @@ const ModelDropdown = ({
                                        : 'text-[hsla(var(--text-primary))]'
                                    )}
                                    onClick={() => {
-                                  if (!isConfigured && engine.type === 'remote')
+                                      if (
+                                        !isConfigured &&
+                                        engine.type === 'remote'
+                                      )
                                        return null
                                      if (isDownloaded) {
                                        onClickModelItem(model.id)
@ -568,7 +573,7 @@ const ModelDropdown = ({
                                    <div className="flex gap-x-2">
                                      <p
                                        className={twMerge(
-                                      'line-clamp-1',
+                                          'max-w-[200px] overflow-hidden truncate whitespace-nowrap',
                                          !isDownloaded &&
                                            'text-[hsla(var(--text-secondary))]'
                                        )}
@ -618,6 +623,8 @@ const ModelDropdown = ({
                                      )}
                                    </div>
                                  </li>
+                                )}
+                              </>
                            )
                          })}
                      </ul>
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@ -29,15 +29,20 @@ const ModelLabel = ({ size, compact }: Props) => {
  const { settings } = useSettings()

  const getLabel = (size: number) => {
-    const minimumRamModel = size * 1.25
-    const availableRam =
-      settings?.run_mode === 'gpu'
+    const minimumRamModel = (size * 1.25) / (1024 * 1024)
+
+    const availableRam = settings?.gpus?.some((gpu) => gpu.activated)
      ? availableVram * 1000000 // MB to bytes
-        : totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
+      : totalRam -
+        (usedRam +
+          (activeModel?.metadata?.size
+            ? (activeModel.metadata.size * 1.25) / (1024 * 1024)
+            : 0))
+
    if (minimumRamModel > totalRam) {
      return (
        <NotEnoughMemoryLabel
-          unit={settings?.run_mode === 'gpu' ? 'VRAM' : 'RAM'}
+          unit={settings?.gpus?.some((gpu) => gpu.activated) ? 'VRAM' : 'RAM'}
          compact={compact}
        />
      )
--- a/web/containers/Providers/ModelHandler.tsx
+++ b/web/containers/Providers/ModelHandler.tsx
@ -143,8 +143,7 @@ export default function ModelHandler() {
        return
      }

-      // The thread title should not be updated if the message is less than 10 words
-      // And no new line character is present
+      // No new line character is presented in the title
      // And non-alphanumeric characters should be removed
      if (messageContent.includes('\n')) {
        messageContent = messageContent.replace(/\n/g, ' ')
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@ -93,14 +93,8 @@ const ServerLogs = (props: ServerLogsProps) => {
  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])

  return (
-    <ScrollArea
-      ref={listRef}
-      className={twMerge(
-        'h-[calc(100%-49px)] w-full p-4 py-0',
-        logs.length === 0 && 'mx-auto'
-      )}
-      onScroll={handleScroll}
-    >
+    <>
+      <div>
        {withCopy && (
          <div className="absolute right-2 top-7">
            <div className="flex w-full flex-row gap-2">
@ -140,6 +134,15 @@ const ServerLogs = (props: ServerLogsProps) => {
            </div>
          </div>
        )}
+      </div>
+      <ScrollArea
+        ref={listRef}
+        className={twMerge(
+          'h-[calc(100%-49px)] w-full p-4 py-0',
+          logs.length === 0 && 'mx-auto'
+        )}
+        onScroll={handleScroll}
+      >
        <div className="flex h-full w-full flex-col">
          {logs.length > 0 ? (
            <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
@ -155,7 +158,7 @@ const ServerLogs = (props: ServerLogsProps) => {
            <div
              className={twMerge(
                'mt-24 flex w-full flex-col items-center justify-center',
-              withCopy && 'mt-0 py-2'
+                withCopy && 'mt-4 py-2'
              )}
            >
              <svg
@ -287,11 +290,14 @@ const ServerLogs = (props: ServerLogsProps) => {
                  </linearGradient>
                </defs>
              </svg>
-            <p className="text-[hsla(var(--text-secondary)] mt-4">Empty logs</p>
+              <p className="text-[hsla(var(--text-secondary)] mt-4">
+                Empty logs
+              </p>
            </div>
          )}
        </div>
      </ScrollArea>
+    </>
  )
 }

--- a/web/containers/SliderRightPanel/index.tsx
+++ b/web/containers/SliderRightPanel/index.tsx
@ -73,7 +73,7 @@ const SliderRightPanel = ({
          trigger={
            <Input
              type="text"
-              className="-mt-4 h-8 w-[60px]"
+              className="-mt-4 h-8 w-[68px]"
              min={min}
              max={max}
              value={val}
--- a/web/helpers/atoms/App.atom.ts
+++ b/web/helpers/atoms/App.atom.ts
@ -8,6 +8,8 @@ export const mainViewStateAtom = atom<MainViewState>(MainViewState.Thread)

 export const defaultJanDataFolderAtom = atom<string>('')

+export const LocalEngineDefaultVariantAtom = atom<string>('')
+
 const SHOW_RIGHT_PANEL = 'showRightPanel'

 // Store panel atom
--- a/Show More
+++ b/Show More