Merge pull request #4684 from janhq/release/v0.5.15

chore: merge release 0.5.15 branch into main branch
2025-02-18 18:13:27 +07:00 · 2025-02-18 18:13:27 +07:00 · 07428b4cdc
commit 07428b4cdc
parent 3037206108 6c6fc08a45
188 changed files with 4962 additions and 3365 deletions
--- a/.github/workflows/auto-assign-author.yml
+++ b/.github/workflows/auto-assign-author.yml
@ -6,6 +6,7 @@ on:
 jobs:
  assign-author:
    runs-on: ubuntu-latest
    if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
    permissions:
      pull-requests: write
    steps:
--- a/.github/workflows/jan-electron-build-beta.yml
+++ b/.github/workflows/jan-electron-build-beta.yml
@ -9,31 +9,6 @@ jobs:
  get-update-version:
    uses: ./.github/workflows/template-get-update-version.yml
  create-draft-release:
    runs-on: ubuntu-latest
    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
    outputs:
      upload_url: ${{ steps.create_release.outputs.upload_url }}
      version: ${{ steps.get_version.outputs.version }}
    permissions:
      contents: write
    steps:
      - name: Extract tag name without v prefix
        id: get_version
        run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV && echo "::set-output name=version::${GITHUB_REF#refs/tags/v}"
        env:
          GITHUB_REF: ${{ github.ref }}
      - name: Create Draft Release
        id: create_release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ github.ref_name }}
          token: ${{ secrets.GITHUB_TOKEN }}
          name: "${{ env.VERSION }}"
          draft: true
          prerelease: false
          generate_release_notes: true
  build-macos:
    uses: ./.github/workflows/template-build-macos.yml
    secrets: inherit
@ -65,7 +40,7 @@ jobs:
      beta: true
  sync-temp-to-latest:
-    needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64]
+    needs: [build-macos, build-windows-x64, build-linux-x64]
    runs-on: ubuntu-latest
    permissions:
      contents: write
@ -82,19 +57,15 @@ jobs:
          AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
          AWS_EC2_METADATA_DISABLED: "true"
      - name: set release to prerelease
        run: |
          gh release edit v${{ needs.create-draft-release.outputs.version }} --draft=false --prerelease
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
  noti-discord-and-update-url-readme:
-    needs: [build-macos, create-draft-release, build-windows-x64, build-linux-x64, sync-temp-to-latest]
+    needs: [build-macos, get-update-version, build-windows-x64, build-linux-x64, sync-temp-to-latest]
    runs-on: ubuntu-latest
    steps:
      - name: Set version to environment variable
        run: |
-          echo "VERSION=${{ needs.create-draft-release.outputs.version }}" >> $GITHUB_ENV
+          VERSION=${{ needs.get-update-version.outputs.new_version }}
          VERSION="${VERSION#v}"
          echo "VERSION=$VERSION" >> $GITHUB_ENV
      - name: Notify Discord
        uses: Ilshidur/action-discord@master
@ -105,6 +76,5 @@ jobs:
            - macOS Universal: https://delta.jan.ai/beta/jan-beta-mac-universal-{{ VERSION }}.dmg
            - Linux Deb: https://delta.jan.ai/beta/jan-beta-linux-amd64-{{ VERSION }}.deb
            - Linux AppImage: https://delta.jan.ai/beta/jan-beta-linux-x86_64-{{ VERSION }}.AppImage
            - Github Release URL: https://github.com/janhq/jan/releases/tag/v{{ VERSION }}
        env:
          DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK_JAN_BETA }}
--- a/.github/workflows/template-build-jan-server.yml
+++ b/.github/workflows/template-build-jan-server.yml
@ -1,39 +0,0 @@
 name: build-jan-server
 on:
  workflow_call:
    inputs:
      dockerfile_path:
        required: false
        type: string
        default: './Dockerfile'
      docker_image_tag:
        required: true
        type: string
        default: 'ghcr.io/janhq/jan-server:dev-latest'
 jobs:
  build:
    runs-on: ubuntu-latest
    env:
      REGISTRY: ghcr.io
      IMAGE_NAME: janhq/jan-server
    permissions:
      packages: write
    steps:
      - name: Checkout
        uses: actions/checkout@v4
      - name: Log in to the Container registry
        uses: docker/login-action@v3
        with:
          registry: ${{ env.REGISTRY }}
          username: ${{ github.actor }}
          password: ${{ secrets.GITHUB_TOKEN }}
      - name: Build and push Docker image
        uses: docker/build-push-action@v3
        with:
          context: .
          file: ${{ inputs.dockerfile_path }}
          push: true
          tags: ${{ inputs.docker_image_tag }}
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@ -83,7 +83,7 @@ jobs:
          cat ./electron/package.json
          echo "------------------------"
          cat ./package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
@ -122,8 +122,6 @@ jobs:
          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
          ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
--- a/.github/workflows/template-build-macos.yml
+++ b/.github/workflows/template-build-macos.yml
@ -99,7 +99,7 @@ jobs:
          cat ./electron/package.json
          echo "------------------------"
          cat ./package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
@ -168,8 +168,6 @@ jobs:
          APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
          APP_PATH: '.'
          DEVELOPER_ID: ${{ secrets.DEVELOPER_ID }}
          ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
          ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
          POSTHOG_KEY: ${{ secrets.POSTHOG_KEY }}
          POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@ -108,7 +108,7 @@ jobs:
          cat ./package.json
          echo "------------------------"
          cat ./electron/scripts/uninstaller.nsh
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "github", "owner": "janhq", "repo": "jan", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/beta", "channel": "beta"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-beta", "channel": "beta"}]' electron/package.json > /tmp/package.json
          mv /tmp/package.json electron/package.json
          cat electron/package.json
@ -160,8 +160,6 @@ jobs:
          make build-and-publish
        env:
          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          ANALYTICS_ID: ${{ secrets.JAN_APP_UMAMI_PROJECT_API_KEY }}
          ANALYTICS_HOST: ${{ secrets.JAN_APP_UMAMI_URL }}
          AZURE_KEY_VAULT_URI: ${{ secrets.AZURE_KEY_VAULT_URI }}
          AZURE_CLIENT_ID: ${{ secrets.AZURE_CLIENT_ID }}
          AZURE_TENANT_ID: ${{ secrets.AZURE_TENANT_ID }}
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@ -12,6 +12,7 @@ export enum ExtensionTypeEnum {
  SystemMonitoring = 'systemMonitoring',
  HuggingFace = 'huggingFace',
  Engine = 'engine',
  Hardware = 'hardware',
 }
 export interface ExtensionType {
--- a/core/src/browser/extensions/engines/OAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.test.ts
@ -38,8 +38,14 @@ describe('OAIEngine', () => {
  it('should subscribe to events on load', () => {
    engine.onLoad()
-    expect(events.on).toHaveBeenCalledWith(MessageEvent.OnMessageSent, expect.any(Function))
+    expect(events.on).toHaveBeenCalledWith(
-    expect(events.on).toHaveBeenCalledWith(InferenceEvent.OnInferenceStopped, expect.any(Function))
+      MessageEvent.OnMessageSent,
      expect.any(Function)
    )
    expect(events.on).toHaveBeenCalledWith(
      InferenceEvent.OnInferenceStopped,
      expect.any(Function)
    )
  })
  it('should handle inference request', async () => {
@ -77,7 +83,12 @@ describe('OAIEngine', () => {
    expect(events.emit).toHaveBeenCalledWith(
      MessageEvent.OnMessageUpdate,
      expect.objectContaining({
-        content: [{ type: ContentType.Text, text: { value: 'test response', annotations: [] } }],
+        content: [
          {
            type: ContentType.Text,
            text: { value: 'test response', annotations: [] },
          },
        ],
        status: MessageStatus.Ready,
      })
    )
@ -101,11 +112,10 @@ describe('OAIEngine', () => {
    await engine.inference(data)
-    expect(events.emit).toHaveBeenCalledWith(
+    expect(events.emit).toHaveBeenLastCalledWith(
      MessageEvent.OnMessageUpdate,
      expect.objectContaining({
-        content: [{ type: ContentType.Text, text: { value: 'test error', annotations: [] } }],
+        status: 'error',
        status: MessageStatus.Error,
        error_code: 500,
      })
    )
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@ -42,7 +42,9 @@ export abstract class OAIEngine extends AIEngine {
   */
  override onLoad() {
    super.onLoad()
-    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) => this.inference(data))
+    events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
    events.on(InferenceEvent.OnInferenceStopped, () => this.stopInference())
  }
@ -128,7 +130,9 @@ export abstract class OAIEngine extends AIEngine {
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      complete: async () => {
-        message.status = message.content.length ? MessageStatus.Ready : MessageStatus.Error
+        message.status = message.content.length
          ? MessageStatus.Ready
          : MessageStatus.Error
        events.emit(MessageEvent.OnMessageUpdate, message)
      },
      error: async (err: any) => {
@ -141,7 +145,10 @@ export abstract class OAIEngine extends AIEngine {
        message.content[0] = {
          type: ContentType.Text,
          text: {
-            value: err.message,
+            value:
              typeof message === 'string'
                ? err.message
                : (JSON.stringify(err.message) ?? err.detail),
            annotations: [],
          },
        }
--- a/core/src/browser/extensions/engines/helpers/sse.test.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.test.ts
@ -1,14 +1,17 @@
 import { lastValueFrom, Observable } from 'rxjs'
 import { requestInference } from './sse'
-import { ReadableStream } from 'stream/web';
+import { ReadableStream } from 'stream/web'
 describe('requestInference', () => {
  it('should send a request to the inference server and return an Observable', () => {
    // Mock the fetch function
    const mockFetch: any = jest.fn(() =>
      Promise.resolve({
        ok: true,
-        json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+        json: () =>
          Promise.resolve({
            choices: [{ message: { content: 'Generated response' } }],
          }),
        headers: new Headers(),
        redirected: false,
        status: 200,
@ -36,7 +39,10 @@ describe('requestInference', () => {
    const mockFetch: any = jest.fn(() =>
      Promise.resolve({
        ok: false,
-        json: () => Promise.resolve({ error: { message: 'Wrong API Key', code: 'invalid_api_key' } }),
+        json: () =>
          Promise.resolve({
            error: { message: 'Invalid API Key.', code: 'invalid_api_key' },
          }),
        headers: new Headers(),
        redirected: false,
        status: 401,
@ -56,16 +62,22 @@ describe('requestInference', () => {
    // Assert the expected behavior
    expect(result).toBeInstanceOf(Observable)
-    expect(lastValueFrom(result)).rejects.toEqual({ message: 'Wrong API Key', code: 'invalid_api_key' })
+    expect(lastValueFrom(result)).rejects.toEqual({
      message: 'Invalid API Key.',
      code: 'invalid_api_key',
    })
  })
 })
-  it('should handle a successful response with a transformResponse function', () => {
+it('should handle a successful response with a transformResponse function', () => {
  // Mock the fetch function
  const mockFetch: any = jest.fn(() =>
    Promise.resolve({
      ok: true,
-        json: () => Promise.resolve({ choices: [{ message: { content: 'Generated response' } }] }),
+      json: () =>
        Promise.resolve({
          choices: [{ message: { content: 'Generated response' } }],
        }),
      headers: new Headers(),
      redirected: false,
      status: 200,
@ -78,47 +90,57 @@ describe('requestInference', () => {
  const inferenceUrl = 'https://inference-server.com'
  const requestBody = { message: 'Hello' }
  const model = { id: 'model-id', parameters: { stream: false } }
-    const transformResponse = (data: any) => data.choices[0].message.content.toUpperCase()
+  const transformResponse = (data: any) =>
    data.choices[0].message.content.toUpperCase()
  // Call the function
-    const result = requestInference(inferenceUrl, requestBody, model, undefined, undefined, transformResponse)
+  const result = requestInference(
    inferenceUrl,
    requestBody,
    model,
    undefined,
    undefined,
    transformResponse
  )
  // Assert the expected behavior
  expect(result).toBeInstanceOf(Observable)
  expect(lastValueFrom(result)).resolves.toEqual('GENERATED RESPONSE')
-  })
+})
-
+it('should handle a successful response with streaming enabled', () => {
  it('should handle a successful response with streaming enabled', () => {
  // Mock the fetch function
  const mockFetch: any = jest.fn(() =>
    Promise.resolve({
      ok: true,
      body: new ReadableStream({
        start(controller) {
-            controller.enqueue(new TextEncoder().encode('data: {"choices": [{"delta": {"content": "Streamed"}}]}'));
+          controller.enqueue(
-            controller.enqueue(new TextEncoder().encode('data: [DONE]'));
+            new TextEncoder().encode(
-            controller.close();
+              'data: {"choices": [{"delta": {"content": "Streamed"}}]}'
-          }
+            )
          )
          controller.enqueue(new TextEncoder().encode('data: [DONE]'))
          controller.close()
        },
      }),
      headers: new Headers(),
      redirected: false,
      status: 200,
      statusText: 'OK',
    })
-    );
+  )
-    jest.spyOn(global, 'fetch').mockImplementation(mockFetch);
+  jest.spyOn(global, 'fetch').mockImplementation(mockFetch)
  // Define the test inputs
-    const inferenceUrl = 'https://inference-server.com';
+  const inferenceUrl = 'https://inference-server.com'
-    const requestBody = { message: 'Hello' };
+  const requestBody = { message: 'Hello' }
-    const model = { id: 'model-id', parameters: { stream: true } };
+  const model = { id: 'model-id', parameters: { stream: true } }
  // Call the function
-    const result = requestInference(inferenceUrl, requestBody, model);
+  const result = requestInference(inferenceUrl, requestBody, model)
  // Assert the expected behavior
-    expect(result).toBeInstanceOf(Observable);
+  expect(result).toBeInstanceOf(Observable)
-    expect(lastValueFrom(result)).resolves.toEqual('Streamed');
+  expect(lastValueFrom(result)).resolves.toEqual('Streamed')
-  });
+})
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@ -32,21 +32,20 @@ export function requestInference(
    })
      .then(async (response) => {
        if (!response.ok) {
-          const data = await response.json()
+          if (response.status === 401) {
-          let errorCode = ErrorCode.Unknown
+            throw {
-          if (data.error) {
+              code: ErrorCode.InvalidApiKey,
-            errorCode = data.error.code ?? data.error.type ?? ErrorCode.Unknown
+              message: 'Invalid API Key.',
          } else if (response.status === 401) {
            errorCode = ErrorCode.InvalidApiKey
            }
          const error = {
            message: data.error?.message ?? data.message ?? 'Error occurred.',
            code: errorCode,
          }
-          subscriber.error(error)
+          let data = await response.json()
-          subscriber.complete()
+          try {
            handleError(data)
          } catch (err) {
            subscriber.error(err)
            return
          }
        }
        // There could be overriden stream parameter in the model
        // that is set in request body (transformed payload)
        if (
@ -54,9 +53,10 @@ export function requestInference(
          model.parameters?.stream === false
        ) {
          const data = await response.json()
-          if (data.error || data.message) {
+          try {
-            subscriber.error(data.error ?? data)
+            handleError(data)
-            subscriber.complete()
+          } catch (err) {
            subscriber.error(err)
            return
          }
          if (transformResponse) {
@ -91,13 +91,10 @@ export function requestInference(
                  const toParse = cachedLines + line
                  if (!line.includes('data: [DONE]')) {
                    const data = JSON.parse(toParse.replace('data: ', ''))
-                    if (
+                    try {
-                      'error' in data ||
+                      handleError(data)
-                      'message' in data ||
+                    } catch (err) {
-                      'detail' in data
+                      subscriber.error(err)
                    ) {
                      subscriber.error(data.error ?? data)
                      subscriber.complete()
                      return
                    }
                    content += data.choices[0]?.delta?.content ?? ''
@ -118,3 +115,18 @@ export function requestInference(
      .catch((err) => subscriber.error(err))
  })
 }
 /**
 * Handle error and normalize it to a common format.
 * @param data
 */
 const handleError = (data: any) => {
  if (
    data.error ||
    data.message ||
    data.detail ||
    (Array.isArray(data) && data.length && data[0].error)
  ) {
    throw data.error ?? data[0]?.error ?? data
  }
 }
--- a/core/src/browser/extensions/enginesManagement.ts
+++ b/core/src/browser/extensions/enginesManagement.ts
@ -5,6 +5,7 @@ import {
  EngineReleased,
  EngineConfig,
  DefaultEngineVariant,
  Model,
 } from '../../types'
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
@ -103,6 +104,11 @@ export abstract class EngineManagementExtension extends BaseExtension {
    engineConfig?: EngineConfig
  ): Promise<{ messages: string }>
  /**
   * Add a new remote model for a specific engine
   */
  abstract addRemoteModel(model: Model): Promise<void>
  /**
   * @returns A Promise that resolves to an object of remote models list .
   */
--- a/core/src/browser/extensions/hardwareManagement.ts
+++ b/core/src/browser/extensions/hardwareManagement.ts
@ -0,0 +1,26 @@
 import { HardwareInformation } from '../../types'
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
 /**
 * Engine management extension. Persists and retrieves engine management.
 * @abstract
 * @extends BaseExtension
 */
 export abstract class HardwareManagementExtension extends BaseExtension {
  type(): ExtensionTypeEnum | undefined {
    return ExtensionTypeEnum.Hardware
  }
  /**
   * @returns A Promise that resolves to an object of list hardware.
   */
  abstract getHardware(): Promise<HardwareInformation>
  /**
   * @returns A Promise that resolves to an object of set active gpus.
   */
  abstract setAvtiveGpu(data: { gpus: number[] }): Promise<{
    message: string
    activated_gpus: number[]
  }>
 }
--- a/core/src/browser/extensions/index.test.ts
+++ b/core/src/browser/extensions/index.test.ts
@ -1,6 +1,5 @@
 import { ConversationalExtension } from './index';
 import { InferenceExtension } from './index';
 import { MonitoringExtension } from './index';
 import { AssistantExtension } from './index';
 import { ModelExtension } from './index';
 import * as Engines from './index';
@ -14,10 +13,6 @@ describe('index.ts exports', () => {
    expect(InferenceExtension).toBeDefined();
  });
  test('should export MonitoringExtension', () => {
    expect(MonitoringExtension).toBeDefined();
  });
  test('should export AssistantExtension', () => {
    expect(AssistantExtension).toBeDefined();
  });
--- a/core/src/browser/extensions/index.ts
+++ b/core/src/browser/extensions/index.ts
@ -9,10 +9,7 @@ export { ConversationalExtension } from './conversational'
 */
 export { InferenceExtension } from './inference'
-/**
+
 * Monitoring extension for system monitoring.
 */
 export { MonitoringExtension } from './monitoring'
 /**
 * Assistant extension for managing assistants.
@ -33,3 +30,8 @@ export * from './engines'
 *  Engines Management
 */
 export * from './enginesManagement'
 /**
 *  Hardware Management
 */
 export * from './hardwareManagement'
--- a/core/src/browser/extensions/monitoring.test.ts
+++ b/core/src/browser/extensions/monitoring.test.ts
@ -1,42 +0,0 @@
 import { ExtensionTypeEnum } from '../extension';
 import { MonitoringExtension } from './monitoring';
 it('should have the correct type', () => {
  class TestMonitoringExtension extends MonitoringExtension {
    getGpuSetting(): Promise<GpuSetting | undefined> {
      throw new Error('Method not implemented.');
    }
    getResourcesInfo(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getCurrentLoad(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getOsInfo(): Promise<OperatingSystemInfo> {
      throw new Error('Method not implemented.');
    }
  }
  const monitoringExtension = new TestMonitoringExtension();
  expect(monitoringExtension.type()).toBe(ExtensionTypeEnum.SystemMonitoring);
 });
 it('should create an instance of MonitoringExtension', () => {
  class TestMonitoringExtension extends MonitoringExtension {
    getGpuSetting(): Promise<GpuSetting | undefined> {
      throw new Error('Method not implemented.');
    }
    getResourcesInfo(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getCurrentLoad(): Promise<any> {
      throw new Error('Method not implemented.');
    }
    getOsInfo(): Promise<OperatingSystemInfo> {
      throw new Error('Method not implemented.');
    }
  }
  const monitoringExtension = new TestMonitoringExtension();
  expect(monitoringExtension).toBeInstanceOf(MonitoringExtension);
 });
--- a/core/src/browser/extensions/monitoring.ts
+++ b/core/src/browser/extensions/monitoring.ts
@ -1,20 +0,0 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
 import { GpuSetting, MonitoringInterface, OperatingSystemInfo } from '../../types'
 /**
 * Monitoring extension for system monitoring.
 * @extends BaseExtension
 */
 export abstract class MonitoringExtension extends BaseExtension implements MonitoringInterface {
  /**
   * Monitoring extension type.
   */
  type(): ExtensionTypeEnum | undefined {
    return ExtensionTypeEnum.SystemMonitoring
  }
  abstract getGpuSetting(): Promise<GpuSetting | undefined>
  abstract getResourcesInfo(): Promise<any>
  abstract getCurrentLoad(): Promise<any>
  abstract getOsInfo(): Promise<OperatingSystemInfo>
 }
--- a/core/src/types/config/appConfigEntity.ts
+++ b/core/src/types/config/appConfigEntity.ts
@ -1,4 +1,5 @@
 export type AppConfiguration = {
  data_folder: string
  quick_ask: boolean
  distinct_id?: string
 }
--- a/core/src/types/engine/index.ts
+++ b/core/src/types/engine/index.ts
@ -18,6 +18,7 @@ export type EngineMetadata = {
      template?: string
    }
  }
  explore_models_url?: string
 }
 export type EngineVariant = {
--- a/core/src/types/hardware/index.ts
+++ b/core/src/types/hardware/index.ts
@ -0,0 +1,55 @@
 export type Cpu = {
  arch: string
  cores: number
  instructions: string[]
  model: string
  usage: number
 }
 export type GpuAdditionalInformation = {
  compute_cap: string
  driver_version: string
 }
 export type Gpu = {
  activated: boolean
  additional_information?: GpuAdditionalInformation
  free_vram: number
  id: string
  name: string
  total_vram: number
  uuid: string
  version: string
 }
 export type Os = {
  name: string
  version: string
 }
 export type Power = {
  battery_life: number
  charging_status: string
  is_power_saving: boolean
 }
 export type Ram = {
  available: number
  total: number
  type: string
 }
 export type Storage = {
  available: number
  total: number
  type: string
 }
 export type HardwareInformation = {
  cpu: Cpu
  gpus: Gpu[]
  os: Os
  power: Power
  ram: Ram
  storage: Storage
 }
--- a/core/src/types/index.test.ts
+++ b/core/src/types/index.test.ts
@ -4,7 +4,6 @@ import * as model from './model';
 import * as thread from './thread';
 import * as message from './message';
 import * as inference from './inference';
 import * as monitoring from './monitoring';
 import * as file from './file';
 import * as config from './config';
 import * as huggingface from './huggingface';
@ -18,7 +17,6 @@ import * as setting from './setting';
      expect(thread).toBeDefined();
      expect(message).toBeDefined();
      expect(inference).toBeDefined();
      expect(monitoring).toBeDefined();
      expect(file).toBeDefined();
      expect(config).toBeDefined();
      expect(huggingface).toBeDefined();
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@ -3,7 +3,6 @@ export * from './model'
 export * from './thread'
 export * from './message'
 export * from './inference'
 export * from './monitoring'
 export * from './file'
 export * from './config'
 export * from './huggingface'
@ -11,3 +10,4 @@ export * from './miscellaneous'
 export * from './api'
 export * from './setting'
 export * from './engine'
 export * from './hardware'
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -1,33 +1,25 @@
 import { GpuAdditionalInformation } from '../hardware'
 export type SystemResourceInfo = {
  memAvailable: number
 }
 export type RunMode = 'cpu' | 'gpu'
 export type GpuSetting = {
  notify: boolean
  run_mode: RunMode
  nvidia_driver: {
    exist: boolean
    version: string
  }
  cuda: {
    exist: boolean
    version: string
  }
  gpus: GpuSettingInfo[]
  gpu_highest_vram: string
  gpus_in_use: string[]
  is_initial: boolean
  // TODO: This needs to be set based on user toggle in settings
  vulkan: boolean
  cpu?: any
 }
 export type GpuSettingInfo = {
  activated: boolean
  free_vram: number
  id: string
  vram: string
  name: string
-  arch?: string
+  total_vram: number
  uuid: string
  version: string
  additional_information?: GpuAdditionalInformation
 }
 export type SystemInformation = {
@ -42,9 +34,6 @@ export type SupportedPlatform = SupportedPlatformTuple[number]
 export type OperatingSystemInfo = {
  platform: SupportedPlatform | 'unknown'
  arch: string
  release: string
  machine: string
  version: string
  totalMem: number
  freeMem: number
 }
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -1,5 +1,3 @@
 import { FileMetadata } from '../file'
 /**
 * Represents the information about a model.
 * @stored
@ -70,6 +68,11 @@ export type Model = {
   */
  id: string
  /**
   * The model identifier, modern version of id.
   */
  model?: string
  /**
   * Human-readable name that is used for UI.
   */
@ -147,6 +150,7 @@ export type ModelSettingParams = {
 */
 export type ModelRuntimeParams = {
  temperature?: number
  max_temperature?: number
  token_limit?: number
  top_k?: number
  top_p?: number
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@ -1,13 +0,0 @@
 import * as monitoringInterface from './monitoringInterface'
 import * as resourceInfo from './resourceInfo'
 import * as index from './index'
 it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
  for (const key in monitoringInterface) {
    expect(index[key]).toBe(monitoringInterface[key])
  }
  for (const key in resourceInfo) {
    expect(index[key]).toBe(resourceInfo[key])
  }
 })
--- a/core/src/types/monitoring/index.ts
+++ b/core/src/types/monitoring/index.ts
@ -1,2 +0,0 @@
 export * from './monitoringInterface'
 export * from './resourceInfo'
--- a/core/src/types/monitoring/monitoringInterface.ts
+++ b/core/src/types/monitoring/monitoringInterface.ts
@ -1,29 +0,0 @@
 import { GpuSetting, OperatingSystemInfo } from '../miscellaneous'
 /**
 * Monitoring extension for system monitoring.
 * @extends BaseExtension
 */
 export interface MonitoringInterface {
  /**
   * Returns information about the system resources.
   * @returns {Promise<any>} A promise that resolves with the system resources information.
   */
  getResourcesInfo(): Promise<any>
  /**
   * Returns the current system load.
   * @returns {Promise<any>} A promise that resolves with the current system load.
   */
  getCurrentLoad(): Promise<any>
  /**
   * Returns the GPU configuration.
   */
  getGpuSetting(): Promise<GpuSetting | undefined>
  /**
   * Returns information about the operating system.
   */
  getOsInfo(): Promise<OperatingSystemInfo>
 }
--- a/core/src/types/monitoring/resourceInfo.ts
+++ b/core/src/types/monitoring/resourceInfo.ts
@ -1,6 +0,0 @@
 export type ResourceInfo = {
  mem: {
    totalMemory: number
    usedMemory: number
  }
 }
--- a/docs/src/components/DropdownDownload/index.tsx
+++ b/docs/src/components/DropdownDownload/index.tsx
@ -65,7 +65,7 @@ const DropdownDownload = ({ lastRelease }: Props) => {
    const userAgent = navigator.userAgent
    if (userAgent.includes('Windows')) {
      // windows user
-      setDefaultSystem(systems[2])
+      setDefaultSystem(systems[1])
    } else if (userAgent.includes('Linux')) {
      // linux user
      setDefaultSystem(systems[3])
--- a/docs/src/pages/about/handbook/analytics.mdx
+++ b/docs/src/pages/about/handbook/analytics.mdx
@ -23,6 +23,4 @@ Adhering to Jan's privacy preserving philosophy, our analytics philosophy is to
 ## What is tracked
 1. By default, Github tracks downloads and device metadata for all public GitHub repositories. This helps us troubleshoot & ensure cross-platform support.
-2. We use [Umami](https://umami.is/) to collect, analyze, and understand application data while maintaining visitor privacy and data ownership. We are using the Umami Cloud in Europe to ensure GDPR compliance. Please see [Umami Privacy Policy](https://umami.is/privacy) for more details.
+2. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.
 3. We use Umami to track a single `app.opened` event without additional user metadata, in order to understand retention. In addition, we track `app.version` to understand app version usage.
 4. Additionally, we plan to enable a `Settings` feature for users to turn off all tracking.
--- a/docs/src/pages/docs/_meta.json
+++ b/docs/src/pages/docs/_meta.json
@ -14,6 +14,11 @@
  "desktop": "Installation",
  "data-folder": "Jan Data Folder",
  "privacy": "Privacy",
  "privacy-policy": {
    "type": "page",
    "display": "hidden",
    "title": "Privacy Policy"
  },
  "user-guides": {
    "title": "BASIC USAGE",
    "type": "separator"
--- a/docs/src/pages/docs/configure-extensions.mdx
+++ b/docs/src/pages/docs/configure-extensions.mdx
@ -55,7 +55,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Conversational",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables conversations and state persistence via your filesystem",
+        "description": "This extension enables conversations and state persistence via your filesystem.",
        "url": "extension://@janhq/conversational-extension/dist/index.js"
    },
    "@janhq/inference-anthropic-extension": {
@ -70,7 +70,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Anthropic Inference Engine",
        "version": "1.0.2",
        "main": "dist/index.js",
-        "description": "This extension enables Anthropic chat completion API calls",
+        "description": "This extension enables Anthropic chat completion API calls.",
        "url": "extension://@janhq/inference-anthropic-extension/dist/index.js"
    },
    "@janhq/inference-triton-trt-llm-extension": {
@ -85,7 +85,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Triton-TRT-LLM Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option",
+        "description": "This extension enables Nvidia's TensorRT-LLM as an inference engine option.",
        "url": "extension://@janhq/inference-triton-trt-llm-extension/dist/index.js"
    },
    "@janhq/inference-mistral-extension": {
@ -100,7 +100,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "MistralAI Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables Mistral chat completion API calls",
+        "description": "This extension enables Mistral chat completion API calls.",
        "url": "extension://@janhq/inference-mistral-extension/dist/index.js"
    },
    "@janhq/inference-martian-extension": {
@ -115,7 +115,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Martian Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables Martian chat completion API calls",
+        "description": "This extension enables Martian chat completion API calls.",
        "url": "extension://@janhq/inference-martian-extension/dist/index.js"
    },
    "@janhq/inference-openrouter-extension": {
@ -130,7 +130,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "OpenRouter Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Open Router chat completion API calls",
+        "description": "This extension enables Open Router chat completion API calls.",
        "url": "extension://@janhq/inference-openrouter-extension/dist/index.js"
    },
    "@janhq/inference-nvidia-extension": {
@ -145,7 +145,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "NVIDIA NIM Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables NVIDIA chat completion API calls",
+        "description": "This extension enables NVIDIA chat completion API calls.",
        "url": "extension://@janhq/inference-nvidia-extension/dist/index.js"
    },
    "@janhq/inference-groq-extension": {
@ -160,7 +160,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Groq Inference Engine",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables fast Groq chat completion API calls",
+        "description": "This extension enables fast Groq chat completion API calls.",
        "url": "extension://@janhq/inference-groq-extension/dist/index.js"
    },
    "@janhq/inference-openai-extension": {
@ -175,7 +175,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "OpenAI Inference Engine",
        "version": "1.0.2",
        "main": "dist/index.js",
-        "description": "This extension enables OpenAI chat completion API calls",
+        "description": "This extension enables OpenAI chat completion API calls.",
        "url": "extension://@janhq/inference-openai-extension/dist/index.js"
    },
    "@janhq/inference-cohere-extension": {
@ -190,7 +190,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Cohere Inference Engine",
        "version": "1.0.0",
        "main": "dist/index.js",
-        "description": "This extension enables Cohere chat completion API calls",
+        "description": "This extension enables Cohere chat completion API calls.",
        "url": "extension://@janhq/inference-cohere-extension/dist/index.js"
    },
    "@janhq/model-extension": {
@ -205,7 +205,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Model Management",
        "version": "1.0.33",
        "main": "dist/index.js",
-        "description": "Model Management Extension provides model exploration and seamless downloads",
+        "description": "Model Management Extension provides model exploration and seamless downloads.",
        "url": "extension://@janhq/model-extension/dist/index.js"
    },
    "@janhq/monitoring-extension": {
@ -220,7 +220,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "System Monitoring",
        "version": "1.0.10",
        "main": "dist/index.js",
-        "description": "This extension provides system health and OS level data",
+        "description": "This extension provides system health and OS level data.",
        "url": "extension://@janhq/monitoring-extension/dist/index.js"
    },
    "@janhq/assistant-extension": {
@ -235,7 +235,7 @@ Extensions in Jan are configured through a JSON file that defines their behavior
        "productName": "Jan Assistant",
        "version": "1.0.1",
        "main": "dist/index.js",
-        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models",
+        "description": "This extension enables assistants, including Jan, a default assistant that can call all downloaded models.",
        "url": "extension://@janhq/assistant-extension/dist/index.js"
    },
    "@janhq/tensorrt-llm-extension": {
--- a/docs/src/pages/docs/desktop/mac.mdx
+++ b/docs/src/pages/docs/desktop/mac.mdx
@ -41,7 +41,7 @@ Ensure that your system meets the following requirements to use Jan effectively:
 ### Mac Performance Guide
 <Callout type="info">
-**Apple Silicon Macs** leverage Metal for GPU acceleration, providing faster performance than **Appple Intel Macs**, which rely solely on CPU processing.
+**Apple Silicon Macs** leverage Metal for GPU acceleration, providing faster performance than **Apple Intel Macs**, which rely solely on CPU processing.
 </Callout>
 **Apple Silicon (M1, M2, M3)**
 - Metal acceleration enabled by default, no configuration required
--- a/docs/src/pages/docs/install-engines.mdx
+++ b/docs/src/pages/docs/install-engines.mdx
@ -47,8 +47,8 @@ To add a new remote engine:
 |-------|-------------|----------|
 | Engine Name | Name for your engine (e.g., "OpenAI", "Claude") | ✓ |
 | API URL | The base URL of the provider's API | ✓ |
-| API Key | Your authentication key from the provider | ✓ |
+| API Key | Your authentication key to activate this engine | ✓ |
-| Model List URL | URL for fetching available models | |
+| Model List URL | The endpoint URL to fetch available models |
 | API Key Template | Custom authorization header format | |
 | Request Format Conversion | Function to convert Jan's request format to provider's format | |
 | Response Format Conversion | Function to convert provider's response format to Jan's format | |
--- a/docs/src/pages/docs/privacy-policy.mdx
+++ b/docs/src/pages/docs/privacy-policy.mdx
@ -0,0 +1,125 @@
 ---
 title: Jan Privacy Policy
 description: Jan's data collection practices, privacy measures, and your rights. Learn how we protect your data and maintain transparency.
 ---
 # Privacy Policy
 <div className="text-sm text-gray-500 mt-2 mb-8">
  Last Updated: January 16, 2025
 </div>
 ## Introduction
 We are committed to protecting your privacy and ensuring you have control over your data. This Privacy Policy outlines what information Menlo Research Pte Ltd (the "Company") collects from users of the Jan desktop app and website (the "Services"), how the Company uses that information, and the measures the Company takes to safeguard that information.
 ## 1. Data Collection and Consent
 ### Explicit Consent
 The Company does not collect any data until you explicitly allow tracking.
 ### Tracking Preferences
 Upon first launching the Jan desktop app or visiting the website, you will be prompted to set your tracking preferences. These preferences can be modified at any time via the app's Settings menu or the website's Privacy Settings.
 ### Legal Basis
 Pursuant to the European Union's General Data Protection Regulation (EU) 2016/679 (the "GDPR"), the Company processes data based on your explicit consent (GDPR Article 6(1)(a)). This means:
 - The Company only processes your data after receiving clear, affirmative consent from you.
 - You may withdraw your consent at any time through the app's Settings menu or the website's Privacy Settings.
 - If you withdraw your consent, the Company will stop optional data collection from the effective date of withdrawal.
 - Your withdrawal of consent will not affect the lawfulness of processing before its withdrawal.
 ## 2. Data We Do Not Collect
 Regardless of your analytics permissions, the Company does not collect the following:
 - Chat History: Your conversations with the Jan app are private and inaccessible to the Company.
 - Chat Settings: Your personalized settings remain solely with you.
 - Language Models: The specific language models you use are not tracked.
 ## 3. Uses of Information
 To build a reliable and user-friendly product offering, understanding how the Jan app is used is essential. If you permit tracking, the Company collects product analytics data to:
 - Improve User Experience: Enhance app functionality based on usage patterns; and
 - Measure Engagement: Assess active users and retention rates to ensure ongoing value.
 ## 4. Product Analytics
 ### Data Collected
 When you opt-in to tracking, we collect the following anonymous data:
 - Active Users: Number of daily active users to gauge engagement.
 - Retention Rates: Track if users continue to find value in the Jan app over time.
 ### Data Anonymity
 - User ID: Analytics data is tied to a randomly generated user ID, ensuring no link to your personal identity.
 - Privacy Assurance: Your chat history and personal data are not tracked or linked to your usage data.
 ## 5. What We Do Not Track
 Even with analytics permissions granted, the Company does not track the following:
 - Conversations: Your interactions with the Jan app remain private.
 - Files: The Company does not scan, upload, or view your files.
 - Personal Identity: The Company does not collect personally identifiable information about users.
 - Prompts: Your prompts and prompt templates are not monitored.
 - Conversation Metrics: The Company does not track context length or conversation length.
 - Model Usage: The specific models you use or their types are not tracked.
 - Storage: You retain full control over storing your files and logs, and your privacy is prioritized.
 ## 6. Using Cloud Models
 The Jan app allows you to connect to cloud-based model APIs (e.g. GPT, Claude models).
 - Data Handling: The API provider processes your messages directly; the Jan app does not access or store these messages.
 - Local Models: Choosing local models ensures all data remains on your device, with no external access.
 ## 7. Data Storage and Processing
 ### Analytics Provider
 The Company uses PostHog EU for analytics, which ensures all data is processed within the European Union.
 ### Data Security
 - Encryption: All data transfers are encrypted using Transport Layer Security (TLS) to ensure secure transmission.
 - Storage: PostHog securely manages the data the Company collects. For more information, please refer to PostHog's GDPR documentation.
 ## 8. Data Retention
 - Retention Period: The Company retains analytics data for up to 12 months unless otherwise required to comply with any applicable legal requirements.
 - Deletion Requests: If you wish to request the deletion of your analytics data, you may do so by sending a written request to hello@jan.ai.
 ## 9. Your Rights and Choices
 - Access and Control: You may access, modify, or delete your tracking preferences at any time through the Jan app or website settings.
 - Data Requests: If you have any requests related to your data, please address them to hello@jan.ai.
 ## 10. Children's Privacy
 Our Services are not targeted at children under the age of 13. The Company does not knowingly collect data from children under the age of 13. If the Company becomes aware that data of persons under the age of 13 has been collected without verifiable parental consent, the Company will take appropriate actions to delete this information.
 ## 11. Changes to the Privacy Policy
 The Company reserves the right, at its sole discretion, to update this Privacy Policy at any time to reflect changes in the practices or legal requirements of the Company. The Company will use reasonable efforts to notify you of any significant changes via app notifications, the website, or email. Your continued use of the Services following such updates means you accept those changes.
 ## 12. Cookies and Tracking Technologies
 Our website utilizes cookies to:
 - Enhance user experience; and
 - Measure website traffic and usage patterns.
 Most browsers allow you to remove or manage cookie functions and adjust your privacy and security preferences.
 For more details, please refer to our Cookie Policy.
 ## 13. Contact Us
 For any questions or concerns about this Privacy Policy or our data practices, please contact hello@jan.ai.
--- a/docs/src/pages/docs/privacy.mdx
+++ b/docs/src/pages/docs/privacy.mdx
@ -1,5 +1,5 @@
 ---
-title: Jan Privacy
+title: Jan's Privacy Approach
 description: Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why.
 keywords:
  [
@ -19,45 +19,57 @@ keywords:
  ]
 ---
 ---
 import { Callout } from 'nextra/components'
 # Privacy
-Jan is an app that allows you to own your AI. We prioritize your control over your data and explain what data we collect and why. 
+Jan is an app that allows you to own your AI. We prioritize local AI models and your control over your data. This page explains what data we collect and why. No tricks.
- Jan can't see your chats with AI
+<Callout>
- You're free to opt out
+For a comprehensive overview of our privacy practices, you can read our full [Privacy Policy](/docs/privacy-policy).
 </Callout>
 <Callout type="info">
 We don't collect any data until you explicitly allow tracking. 
 </Callout>
 You'll be asked about your tracking preferences when you first launch the app, and you can change them at any time in Settings.
 Regardless of your analytics permissions, Jan will **never** access your chat history, chat settings, or the language models you have used.
 ## Why and what we track
-To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. We collect two types of data: performance data and usage data.
+To build a reliable, user-friendly AI that you own, we need to understand how Jan is used. If users allowed us to track, we collect product analytics data.
-### Performance data
+### Product Analytics
 We track app crashes and collect technical details about what went wrong, along with basic information about the hardware you’re using.
 When Jan crashes, we collect technical details about what went wrong.
 - Specific AI model in use during the crash
 - Hardware: `CPU`, `GPU`, `RAM`
 - Logs: `Date/Time`, `OS & version`, `app version`, `error codes & messages`.
 ### Usage data
 We track data like how often the app is opened to check:
 - **Active Users**: How many people use Jan daily to measure engagement
 - **Retention Rates**: To understand if users are finding value in Jan over time
-Usage data is tied to a randomly generated telemetry ID. None of our usage data can be linked to your personal identity.
+Product analytics data is tied to a randomly generated user ID. None of our usage data can be linked to your personal identity. Your chat history and personal data are never tracked.
-## What we **don’t** track:
+## What we **don't** track
- Your conversations with Jan. Those stay on your device.
+
- Your files. We don’t scan, upload, or even look at them.
+<Callout type="info">
- Anything tied to your identity.
+Even if you grant analytics permissions, Jan doesn't track many of your private activities. 
 </Callout>
 - We don't track your conversations with Jan.
 - We don't scan, upload, or look at your files.
 - We don't collect anything tied to your identity.
 - We don't track your prompts and prompt templates.
 - We don't monitor context length or conversation length.
 - We don't track the models you have used or their types.
 You store the files and logs that are a priority for your privacy yourself. 
 ## Using Cloud Models
 Jan allows you to connect cloud model APIs. If you choose to use cloud-based models (e.g. GPT, Claude models), the API provider handling the model will have access to your messages as part of processing the request. Again, Jan doesn't see or store these messages - they go directly to the provider. Remember: With local models, everything stays on your device, so no one - not even us- can see your messages.
 ## Where we store & process data
-We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us self-host and securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance).
+We use [PostHog](https://posthog.com/eu) EU for analytics, ensuring all data is processed within the European Union. This setup complies with GDPR and other strict privacy regulations. PostHog lets us securely manage the data we collect. Read more [on PostHog's GDPR doc](https://posthog.com/docs/privacy/gdpr-compliance).
 For a detailed breakdown of the analytics data we collect, you can check out our analytics repo. If you have any questions or concerns, feel free to reach out to us at hi@jan.ai.
--- a/docs/src/pages/index.mdx
+++ b/docs/src/pages/index.mdx
@ -1,6 +1,6 @@
 ---
 title: "Jan: Open source ChatGPT-alternative that runs 100% offline"
-description: "Chat with AI without privact concerns. Jan is an open-source alternative to ChatGPT, running AI models locally on your device."
+description: "Chat with AI without privacy concerns. Jan is an open-source alternative to ChatGPT, running AI models locally on your device."
 keywords:
  [
    Jan,
--- a/docs/src/pages/integrations/workflow-automation/raycast.mdx
+++ b/docs/src/pages/integrations/workflow-automation/raycast.mdx
@ -1,47 +0,0 @@
 ---
 title: Raycast
 keywords:
  [
    Jan,
    Customizable Intelligence, LLM,
    local AI,
    privacy focus,
    free and open source,
    private and offline,
    conversational AI,
    no-subscription fee,
    large language models,
    raycast integration,
    Raycast,
  ]
 description: A step-by-step guide on integrating Jan with Raycast.
 ---
 import { Steps } from 'nextra/components'
 # Raycast
 ## Integrate Raycast with Jan
 [Raycast](https://www.raycast.com/) is a productivity tool designed for macOS that enhances workflow efficiency by providing quick access to various tasks and functionalities through a keyboard-driven interface. To integrate Raycast with Jan, follow the steps below:
 <Steps>
 ### Step 1: Download the TinyLlama Model
 1. Open Jan app.
 2. Go to the **Hub** and download the TinyLlama model.
 3. The model will be available at `~jan/models/tinyllama-1.1b`.
 ### Step 2: Clone and Run the Program
 1. Clone this [GitHub repository](https://github.com/InNoobWeTrust/nitro-raycast).
 2. Execute the project using the following command:
 ```bash title="Node.js"
 npm i && npm run dev
 ```
 ### Step 3: Search for Nitro and Run the Model
 Search for `Nitro` using the program, and you can use the models from Jan in RayCast.
 </Steps>
--- a/docs/src/pages/privacy.mdx
+++ b/docs/src/pages/privacy.mdx
@ -24,13 +24,6 @@ Jan runs with privacy by default and is used 100% offline on your own computer.
 If you use a Remote AI API (e.g., OpenAI API, Groq API), your data will naturally travel to their servers. They will be subject to the privacy policy of the respective API provider.
 </Callout>
 Jan uses [Umami](https://umami.is/) for analytics, which is a privacy-focused, GDPR-compliant analytics tool that does not track personal information. We use this to get aggregate reports on OS and hardware types and prioritize our engineering roadmap. As per [Umami's Privacy Policy](https://umami.is/privacy), Umami uses the following data points to generate its reports:
 - OS and device characteristics
 - IP address
 Jan does not get any of this data, and we do not track IP addresses or other identifying information. We are actively looking into more privacy-respecting ways to handle analytics, crash reports, and telemetry and would love to work with the community on this. 
 ### Cortex
 Cortex is a library that runs large language models (LLMs) locally on your computer. Cortex does not collect any personal information.
--- a/electron/main.ts
+++ b/electron/main.ts
@ -28,6 +28,7 @@ import { setupReactDevTool } from './utils/dev'
 import { trayManager } from './managers/tray'
 import { logSystemInfo } from './utils/system'
 import { registerGlobalShortcuts } from './utils/shortcut'
 import { registerLogger } from './utils/logger'
 const preloadPath = join(__dirname, 'preload.js')
 const rendererPath = join(__dirname, '..', 'renderer')
@ -79,6 +80,7 @@ app
  })
  .then(setupCore)
  .then(createUserSpace)
  .then(registerLogger)
  .then(migrate)
  .then(setupExtensions)
  .then(setupMenu)
--- a/electron/package.json
+++ b/electron/package.json
@ -1,6 +1,6 @@
 {
  "name": "jan",
-  "version": "0.1.4",
+  "version": "0.1.1737985524",
  "main": "./build/main.js",
  "author": "Jan <service@jan.ai>",
  "license": "MIT",
--- a/extensions/monitoring-extension/src/node/logger.ts
+++ b/extensions/monitoring-extension/src/node/logger.ts
@ -1,16 +1,28 @@
-import fs from 'fs'
+import {
  createWriteStream,
  existsSync,
  mkdirSync,
  readdir,
  stat,
  unlink,
  writeFileSync,
 } from 'fs'
 import util from 'util'
 import {
  getAppConfigurations,
  getJanDataFolderPath,
  Logger,
  LoggerManager,
 } from '@janhq/core/node'
 import path, { join } from 'path'
-export class FileLogger extends Logger {
+/**
 * File Logger
 */
 export class FileLogger implements Logger {
  name = 'file'
  logCleaningInterval: number = 120000
-  timeout: NodeJS.Timeout | null = null
+  timeout: NodeJS.Timeout | undefined
  appLogPath: string = './'
  logEnabled: boolean = true
@ -18,14 +30,13 @@ export class FileLogger extends Logger {
    logEnabled: boolean = true,
    logCleaningInterval: number = 120000
  ) {
    super()
    this.logEnabled = logEnabled
    if (logCleaningInterval) this.logCleaningInterval = logCleaningInterval
    const appConfigurations = getAppConfigurations()
    const logFolderPath = join(appConfigurations.data_folder, 'logs')
-    if (!fs.existsSync(logFolderPath)) {
+    if (!existsSync(logFolderPath)) {
-      fs.mkdirSync(logFolderPath, { recursive: true })
+      mkdirSync(logFolderPath, { recursive: true })
    }
    this.appLogPath = join(logFolderPath, 'app.log')
@ -69,8 +80,8 @@ export class FileLogger extends Logger {
    const logDirectory = path.join(getJanDataFolderPath(), 'logs')
    // Perform log cleaning
    const currentDate = new Date()
-    if (fs.existsSync(logDirectory))
+    if (existsSync(logDirectory))
-      fs.readdir(logDirectory, (err, files) => {
+      readdir(logDirectory, (err, files) => {
        if (err) {
          console.error('Error reading log directory:', err)
          return
@ -78,7 +89,7 @@ export class FileLogger extends Logger {
        files.forEach((file) => {
          const filePath = path.join(logDirectory, file)
-          fs.stat(filePath, (err, stats) => {
+          stat(filePath, (err, stats) => {
            if (err) {
              console.error('Error getting file stats:', err)
              return
@ -86,7 +97,7 @@ export class FileLogger extends Logger {
            // Check size
            if (stats.size > size) {
-              fs.unlink(filePath, (err) => {
+              unlink(filePath, (err) => {
                if (err) {
                  console.error('Error deleting log file:', err)
                  return
@ -103,7 +114,7 @@ export class FileLogger extends Logger {
                  (1000 * 3600 * 24)
              )
              if (daysDifference > days) {
-                fs.unlink(filePath, (err) => {
+                unlink(filePath, (err) => {
                  if (err) {
                    console.error('Error deleting log file:', err)
                    return
@ -124,15 +135,20 @@ export class FileLogger extends Logger {
  }
 }
 /**
 * Write log function implementation
 * @param message
 * @param logPath
 */
 const writeLog = (message: string, logPath: string) => {
-  if (!fs.existsSync(logPath)) {
+  if (!existsSync(logPath)) {
    const logDirectory = path.join(getJanDataFolderPath(), 'logs')
-    if (!fs.existsSync(logDirectory)) {
+    if (!existsSync(logDirectory)) {
-      fs.mkdirSync(logDirectory)
+      mkdirSync(logDirectory)
    }
-    fs.writeFileSync(logPath, message)
+    writeFileSync(logPath, message)
  } else {
-    const logFile = fs.createWriteStream(logPath, {
+    const logFile = createWriteStream(logPath, {
      flags: 'a',
    })
    logFile.write(util.format(message) + '\n')
@ -140,3 +156,12 @@ const writeLog = (message: string, logPath: string) => {
    console.debug(message)
  }
 }
 /**
 * Register logger for global application logging
 */
 export const registerLogger = () => {
  const logger = new FileLogger()
  LoggerManager.instance().register(logger)
  logger.cleanLogs()
 }
--- a/extensions/conversational-extension/src/index.ts
+++ b/extensions/conversational-extension/src/index.ts
@ -63,7 +63,7 @@ export default class CortexConversationalExtension extends ConversationalExtensi
  async modifyThread(thread: Thread): Promise<void> {
    return this.queue
      .add(() =>
-        ky.post(`${API_URL}/v1/threads/${thread.id}`, { json: thread })
+        ky.patch(`${API_URL}/v1/threads/${thread.id}`, { json: thread })
      )
      .then()
  }
@ -101,7 +101,7 @@ export default class CortexConversationalExtension extends ConversationalExtensi
  async modifyMessage(message: ThreadMessage): Promise<ThreadMessage> {
    return this.queue.add(() =>
      ky
-        .post(
+        .patch(
          `${API_URL}/v1/threads/${message.thread_id}/messages/${message.id}`,
          {
            json: message,
--- a/extensions/engine-management-extension/engines.mjs
+++ b/extensions/engine-management-extension/engines.mjs
@ -6,6 +6,8 @@ import groq from './resources/groq.json' with { type: 'json' }
 import martian from './resources/martian.json' with { type: 'json' }
 import mistral from './resources/mistral.json' with { type: 'json' }
 import nvidia from './resources/nvidia.json' with { type: 'json' }
 import deepseek from './resources/deepseek.json' with { type: 'json' }
 import googleGemini from './resources/google_gemini.json' with { type: 'json' }
 import anthropicModels from './models/anthropic.json' with { type: 'json' }
 import cohereModels from './models/cohere.json' with { type: 'json' }
@ -15,6 +17,8 @@ import groqModels from './models/groq.json' with { type: 'json' }
 import martianModels from './models/martian.json' with { type: 'json' }
 import mistralModels from './models/mistral.json' with { type: 'json' }
 import nvidiaModels from './models/nvidia.json' with { type: 'json' }
 import deepseekModels from './models/deepseek.json' with { type: 'json' }
 import googleGeminiModels from './models/google_gemini.json' with { type: 'json' }
 const engines = [
  anthropic,
@ -25,6 +29,8 @@ const engines = [
  mistral,
  martian,
  nvidia,
  deepseek,
  googleGemini,
 ]
 const models = [
  ...anthropicModels,
@ -35,5 +41,7 @@ const models = [
  ...mistralModels,
  ...martianModels,
  ...nvidiaModels,
  ...deepseekModels,
  ...googleGeminiModels,
 ]
 export { engines, models }
--- a/extensions/engine-management-extension/models/anthropic.json
+++ b/extensions/engine-management-extension/models/anthropic.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
@ -21,6 +22,7 @@
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
@ -34,6 +36,7 @@
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": true
    },
    "engine": "anthropic"
--- a/extensions/engine-management-extension/models/cohere.json
+++ b/extensions/engine-management-extension/models/cohere.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": false
    },
    "engine": "cohere"
@ -21,6 +22,7 @@
    "inference_params": {
      "max_tokens": 4096,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "stream": false
    },
    "engine": "cohere"
--- a/extensions/engine-management-extension/models/deepseek.json
+++ b/extensions/engine-management-extension/models/deepseek.json
@ -0,0 +1,28 @@
 [
  {
    "model": "deepseek-chat",
    "object": "model",
    "name": "DeepSeek Chat",
    "version": "1.0",
    "description": "The deepseek-chat model has been upgraded to DeepSeek-V3. deepseek-reasoner points to the new model DeepSeek-R1",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "deepseek"
  },
  {
    "model": "deepseek-reasoner",
    "object": "model",
    "name": "DeepSeek R1",
    "version": "1.0",
    "description": "CoT (Chain of Thought) is the reasoning content deepseek-reasoner gives before output the final answer. For details, please refer to Reasoning Model.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "deepseek"
  }
 ]
--- a/extensions/engine-management-extension/models/google_gemini.json
+++ b/extensions/engine-management-extension/models/google_gemini.json
@ -0,0 +1,67 @@
 [
  {
    "model": "gemini-2.0-flash",
    "object": "model",
    "name": "Gemini 2.0 Flash",
    "version": "1.0",
    "description": "Gemini 2.0 Flash delivers next-gen features and improved capabilities, including superior speed, native tool use, multimodal generation, and a 1M token context window.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-2.0-flash-lite-preview",
    "object": "model",
    "name": "Gemini 2.0 Flash-Lite Preview",
    "version": "1.0",
    "description": "A Gemini 2.0 Flash model optimized for cost efficiency and low latency.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-1.5-flash",
    "object": "model",
    "name": "Gemini 1.5 Flash",
    "version": "1.0",
    "description": "Gemini 1.5 Flash is a fast and versatile multimodal model for scaling across diverse tasks.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-1.5-flash-8b",
    "object": "model",
    "name": "Gemini 1.5 Flash-8B",
    "version": "1.0",
    "description": "Gemini 1.5 Flash-8B is a small model designed for lower intelligence tasks.",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  },
  {
    "model": "gemini-1.5-pro",
    "object": "model",
    "name": "Gemini 1.5 Pro",
    "version": "1.0",
    "description": "Gemini 1.5 Pro is a mid-size multimodal model that is optimized for a wide-range of reasoning tasks. 1.5 Pro can process large amounts of data at once, including 2 hours of video, 19 hours of audio, codebases with 60,000 lines of code, or 2,000 pages of text. ",
    "inference_params": {
      "max_tokens": 8192,
      "temperature": 0.6,
      "stream": true
    },
    "engine": "google_gemini"
  }
 ]
--- a/extensions/engine-management-extension/models/mistral.json
+++ b/extensions/engine-management-extension/models/mistral.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
@ -22,6 +23,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
@ -36,6 +38,7 @@
    "inference_params": {
      "max_tokens": 32000,
      "temperature": 0.7,
      "max_temperature": 1.0,
      "top_p": 0.95,
      "stream": true
    },
--- a/extensions/engine-management-extension/models/nvidia.json
+++ b/extensions/engine-management-extension/models/nvidia.json
@ -8,6 +8,7 @@
    "inference_params": {
      "max_tokens": 1024,
      "temperature": 0.3,
      "max_temperature": 1.0,
      "top_p": 1,
      "stream": false,
      "frequency_penalty": 0,
--- a/extensions/engine-management-extension/models/openai.json
+++ b/extensions/engine-management-extension/models/openai.json
@ -79,12 +79,7 @@
    "description": "OpenAI o1 is a new model with complex reasoning",
    "format": "api",
    "inference_params": {
-      "max_tokens": 100000,
+      "max_tokens": 100000
      "temperature": 1,
      "top_p": 1,
      "stream": true,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "engine": "openai"
  },
@ -97,11 +92,7 @@
    "format": "api",
    "inference_params": {
      "max_tokens": 32768,
-      "temperature": 1,
+      "stream": true
      "top_p": 1,
      "stream": true,
      "frequency_penalty": 0,
      "presence_penalty": 0
    },
    "engine": "openai"
  },
@ -114,11 +105,20 @@
    "format": "api",
    "inference_params": {
      "max_tokens": 65536,
-      "temperature": 1,
+      "stream": true
-      "top_p": 1,
+    },
-      "stream": true,
+    "engine": "openai"
-      "frequency_penalty": 0,
+  },
-      "presence_penalty": 0
+  {
    "model": "o3-mini",
    "object": "model",
    "name": "OpenAI o3-mini",
    "version": "1.0",
    "description": "OpenAI most recent reasoning model, providing high intelligence at the same cost and latency targets of o1-mini.",
    "format": "api",
    "inference_params": {
      "max_tokens": 100000,
      "stream": true
    },
    "engine": "openai"
  }
--- a/extensions/engine-management-extension/models/openrouter.json
+++ b/extensions/engine-management-extension/models/openrouter.json
@ -1,16 +1,91 @@
 [
  {
-    "model": "open-router-auto",
+    "model": "deepseek/deepseek-r1:free",
    "object": "model",
-    "name": "OpenRouter",
+    "name": "DeepSeek: R1",
    "version": "1.0",
-    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
+    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "max_tokens": 128000,
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
-      "presence_penalty": 0
+      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
    "object": "model",
    "name": "DeepSeek: R1 Distill Llama 70B",
    "version": "1.0",
    "description": " OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "deepseek/deepseek-r1-distill-llama-70b:free",
    "object": "model",
    "name": "DeepSeek: R1 Distill Llama 70B",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "meta-llama/llama-3.1-405b-instruct:free",
    "object": "model",
    "name": "Meta: Llama 3.1 405B Instruct",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "qwen/qwen-vl-plus:free",
    "object": "model",
    "name": "Qwen: Qwen VL Plus",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  },
  {
    "model": "qwen/qwen2.5-vl-72b-instruct:free",
    "object": "model",
    "name": "Qwen: Qwen2.5 VL 72B Instruct",
    "version": "1.0",
    "description": "OpenRouter scouts for the lowest prices and best latencies/throughputs across dozens of providers, and lets you choose how to prioritize them.",
    "inference_params": {
      "temperature": 0.7,
      "top_p": 0.95,
      "frequency_penalty": 0,
      "presence_penalty": 0,
      "stream": true
    },
    "engine": "openrouter"
  }
--- a/extensions/engine-management-extension/package.json
+++ b/extensions/engine-management-extension/package.json
@ -29,12 +29,10 @@
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "cpu-instructions": "^0.0.13",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1"
  },
  "bundledDependencies": [
    "cpu-instructions",
    "@janhq/core"
  ],
  "engines": {
--- a/extensions/engine-management-extension/resources/anthropic.json
+++ b/extensions/engine-management-extension/resources/anthropic.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-anthropic-extension",
+  "id": "anthropic",
  "type": "remote",
  "engine": "anthropic",
  "url": "https://console.anthropic.com/settings/keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.anthropic.com/v1/messages",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": \"{{ input_request.messages.0.content }}\", \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": \"{{ message.role}}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"system\": {{ tojson(input_request.messages.0.content) }}, \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %}] {% else %} \"messages\": [{% for message in input_request.messages %} {\"role\": {{ tojson(message.role) }}, \"content\": {% if not message.content or message.content == \"\" %} \".\" {% else %} {{ tojson(message.content) }} {% endif %} } {% if not loop.is_last %},{% endif %} {% endfor %}] {% endif %} {% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"metadata\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %}\"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": \"{{ input_request.model }}\", \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"{{ input_request.role }}\", \"content\": {% if input_request.content and input_request.content.0.type == \"text\" %} \"{{input_request.content.0.text}}\" {% else %} null {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.stop_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.usage.input_tokens }}, \"completion_tokens\": {{ input_request.usage.output_tokens }}, \"total_tokens\": {{ input_request.usage.input_tokens + input_request.usage.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 }, \"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
+        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.type == \"message_start\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"ping\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_delta\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.delta.text }}\" {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% else if input_request.type == \"content_block_stop\" %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.type == \"content_block_stop\" %} \"finish_reason\": \"stop\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {{tojson(input_request)}} {% endif %}"
      }
      }
    },
    "explore_models_url": "https://docs.anthropic.com/en/docs/about-claude/models"
  }
 }
--- a/extensions/engine-management-extension/resources/cohere.json
+++ b/extensions/engine-management-extension/resources/cohere.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-cohere-extension",
+  "id": "cohere",
  "type": "remote",
  "engine": "cohere",
  "url": "https://dashboard.cohere.com/api-keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.cohere.ai/v1/chat",
-        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": \"{{ input_request.messages.0.content }}\", {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% for key, value in input_request %} {% if key == \"messages\" %} {% if input_request.messages.0.role == \"system\" %} \"preamble\": {{ tojson(input_request.messages.0.content) }}, {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_first and not loop.is_last %} {\"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}], {% endif %} \"message\": \"{{ last(input_request.messages).content }}\" {% else %} {% if length(input_request.messages) > 2 %} \"chatHistory\": [{% for message in input_request.messages %} {% if not loop.is_last %} { \"role\": {% if message.role == \"user\" %} \"USER\" {% else %} \"CHATBOT\" {% endif %}, \"content\": \"{{ message.content }}\" } {% if loop.index < length(input_request.messages) - 2 %},{% endif %} {% endif %} {% endfor %}],{% endif %}\"message\": \"{{ last(input_request.messages).content }}\" {% endif %}{% if not loop.is_last %},{% endif %} {% else if key == \"system\" or key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} \"{{ key }}\": {{ tojson(value) }} {% if not loop.is_last %},{% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{% if input_request.stream %} {\"object\": \"chat.completion.chunk\", \"model\": \"{{ input_request.model }}\", \"choices\": [{\"index\": 0, \"delta\": { {% if input_request.event_type == \"text-generation\" %} \"role\": \"assistant\", \"content\": \"{{ input_request.text }}\" {% else %} \"role\": \"assistant\", \"content\": null {% endif %} }, {% if input_request.event_type == \"stream-end\" %} \"finish_reason\": \"{{ input_request.finish_reason }}\" {% else %} \"finish_reason\": null {% endif %} }]} {% else %} {\"id\": \"{{ input_request.generation_id }}\", \"created\": null, \"object\": \"chat.completion\", \"model\": {% if input_request.model %} \"{{ input_request.model }}\" {% else %} \"command-r-plus-08-2024\" {% endif %}, \"choices\": [{ \"index\": 0, \"message\": { \"role\": \"assistant\", \"content\": {% if not input_request.text %} null {% else %}  \"{{ input_request.text }}\" {% endif %}, \"refusal\": null }, \"logprobs\": null, \"finish_reason\": \"{{ input_request.finish_reason }}\" } ], \"usage\": { \"prompt_tokens\": {{ input_request.meta.tokens.input_tokens }}, \"completion_tokens\": {{ input_request.meta.tokens.output_tokens }},\"total_tokens\": {{ input_request.meta.tokens.input_tokens + input_request.meta.tokens.output_tokens }}, \"prompt_tokens_details\": { \"cached_tokens\": 0 },\"completion_tokens_details\": { \"reasoning_tokens\": 0, \"accepted_prediction_tokens\": 0, \"rejected_prediction_tokens\": 0 } }, \"system_fingerprint\": \"fp_6b68a8204b\"} {% endif %}"
      }
-    }
+    },
    "explore_models_url": "https://docs.cohere.com/v2/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/deepseek.json
+++ b/extensions/engine-management-extension/resources/deepseek.json
@ -0,0 +1,23 @@
 {
  "id": "deepseek",
  "type": "remote",
  "engine": "deepseek",
  "url": "https://platform.deepseek.com/api_keys",
  "api_key": "",
  "metadata": {
    "get_models_url": "https://api.deepseek.com/models",
    "header_template": "Authorization: Bearer {{api_key}}",
    "transform_req": {
      "chat_completions": {
        "url": "https://api.deepseek.com/chat/completions",
        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
    },
    "explore_models_url": "https://api-docs.deepseek.com/quick_start/pricing"
  }
 }
--- a/extensions/engine-management-extension/resources/google_gemini.json
+++ b/extensions/engine-management-extension/resources/google_gemini.json
@ -0,0 +1,23 @@
 {
  "id": "google_gemini",
  "type": "remote",
  "engine": "google_gemini",
  "url": "https://aistudio.google.com/apikey",
  "api_key": "",
  "metadata": {
    "get_models_url": "https://generativelanguage.googleapis.com/v1beta/models",
    "header_template": "Authorization: Bearer {{api_key}}",
    "transform_req": {
      "chat_completions": {
        "url": "https://generativelanguage.googleapis.com/v1beta/openai/chat/completions",
        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
    },
    "explore_models_url": "https://ai.google.dev/gemini-api/docs/models/gemini"
  }
 }
--- a/extensions/engine-management-extension/resources/groq.json
+++ b/extensions/engine-management-extension/resources/groq.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-groq-extension",
+  "id": "groq",
  "type": "remote",
  "engine": "groq",
  "url": "https://console.groq.com/keys",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://console.groq.com/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/martian.json
+++ b/extensions/engine-management-extension/resources/martian.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-martian-extension",
+  "id": "martian",
  "type": "remote",
  "engine": "martian",
  "url": "https://withmartian.com/dashboard",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://withmartian.github.io/llm-adapters/"
  }
 }
--- a/extensions/engine-management-extension/resources/mistral.json
+++ b/extensions/engine-management-extension/resources/mistral.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-mistral-extension",
+  "id": "mistral",
  "type": "remote",
  "engine": "mistral",
  "url": "https://console.mistral.ai/api-keys/",
@ -17,6 +17,7 @@
      "chat_completions": {
        "template": "{{tojson(input_request)}}"
      }
-    }
+    },
    "explore_models_url": "https://docs.mistral.ai/getting-started/models/models_overview/"
  }
 }
--- a/extensions/engine-management-extension/resources/nvidia.json
+++ b/extensions/engine-management-extension/resources/nvidia.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-nvidia-extension",
+  "id": "nvidia",
  "type": "remote",
  "engine": "nvidia",
  "url": "https://org.ngc.nvidia.com/setup/personal-keys",
@ -15,8 +15,9 @@
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://build.nvidia.com/models"
  }
 }
--- a/extensions/engine-management-extension/resources/openai.json
+++ b/extensions/engine-management-extension/resources/openai.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-openai-extension",
+  "id": "openai",
  "type": "remote",
  "engine": "openai",
  "url": "https://platform.openai.com/account/api-keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://api.openai.com/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"model\" or key == \"temperature\" or key == \"store\" or key == \"messages\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" or key == \"max_tokens\" or ((input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") and (key == \"stop\")) %} {% if not first %} , {% endif %} {% if key == \"messages\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\") and input_request.messages.0.role == \"system\" %} \"messages\": [{% for message in input_request.messages %} {% if not loop.is_first %} { \"role\": \"{{ message.role }}\", \"content\": \"{{ message.content }}\" } {% if not loop.is_last %} , {% endif %} {% endif %} {% endfor %}] {% else if key == \"max_tokens\" and (input_request.model == \"o1\" or input_request.model == \"o1-preview\" or input_request.model == \"o1-mini\" or input_request.model == \"o3\" or input_request.model == \"o3-mini\") %} \"max_completion_tokens\": {{ tojson(value) }} {% set first = false %} {% else %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://platform.openai.com/docs/models"
  }
 }
--- a/extensions/engine-management-extension/resources/openrouter.json
+++ b/extensions/engine-management-extension/resources/openrouter.json
@ -1,5 +1,5 @@
 {
-  "id": "@janhq/inference-openrouter-extension",
+  "id": "openrouter",
  "type": "remote",
  "engine": "openrouter",
  "url": "https://openrouter.ai/keys",
@ -10,13 +10,14 @@
    "transform_req": {
      "chat_completions": {
        "url": "https://openrouter.ai/api/v1/chat/completions",
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"messages\" or key == \"temperature\" or key == \"store\" or key == \"max_tokens\" or key == \"stream\" or key == \"presence_penalty\" or key == \"metadata\" or key == \"frequency_penalty\" or key == \"tools\" or key == \"tool_choice\" or key == \"logprobs\" or key == \"top_logprobs\" or key == \"logit_bias\" or key == \"n\" or key == \"modalities\" or key == \"prediction\" or key == \"response_format\" or key == \"service_tier\" or key == \"model\" or key == \"seed\" or key == \"stop\" or key == \"stream_options\" or key == \"top_p\" or key == \"parallel_tool_calls\" or key == \"user\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
      }
    },
    "transform_resp": {
      "chat_completions": {
-        "template": "{ {% set first = true %} {% for key, value in input_request %} {% if key == \"choices\" or key == \"created\" or key == \"model\" or key == \"service_tier\" or key == \"system_fingerprint\" or key == \"stream\" or key == \"object\" or key == \"usage\" %} {% if not first %},{% endif %} \"{{ key }}\": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }"
+        "template": "{{tojson(input_request)}}"
      }
      }
    },
    "explore_models_url": "https://openrouter.ai/models"
  }
 }
--- a/extensions/engine-management-extension/rolldown.config.mjs
+++ b/extensions/engine-management-extension/rolldown.config.mjs
@ -13,9 +13,19 @@ export default defineConfig([
      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
      API_URL: JSON.stringify('http://127.0.0.1:39291'),
      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
-      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.46'),
+      PLATFORM: JSON.stringify(process.platform),
      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
      DEFAULT_REMOTE_ENGINES: JSON.stringify(engines),
      DEFAULT_REMOTE_MODELS: JSON.stringify(models),
      DEFAULT_REQUEST_PAYLOAD_TRANSFORM: JSON.stringify(
        `{ {% set first = true %} {% for key, value in input_request %} {% if key == "messages" or key == "model" or key == "temperature" or key == "store" or key == "max_tokens" or key == "stream" or key == "presence_penalty" or key == "metadata" or key == "frequency_penalty" or key == "tools" or key == "tool_choice" or key == "logprobs" or key == "top_logprobs" or key == "logit_bias" or key == "n" or key == "modalities" or key == "prediction" or key == "response_format" or key == "service_tier" or key == "seed" or key == "stop" or key == "stream_options" or key == "top_p" or key == "parallel_tool_calls" or key == "user" %} {% if not first %},{% endif %} "{{ key }}": {{ tojson(value) }} {% set first = false %} {% endif %} {% endfor %} }`
      ),
      DEFAULT_RESPONSE_BODY_TRANSFORM: JSON.stringify(
        '{{tojson(input_request)}}'
      ),
      DEFAULT_REQUEST_HEADERS_TRANSFORM: JSON.stringify(
        'Authorization: Bearer {{api_key}}'
      ),
    },
  },
  {
@ -26,18 +36,7 @@ export default defineConfig([
      file: 'dist/node/index.cjs.js',
    },
    define: {
-      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.46'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
    },
  },
  {
    input: 'src/node/cpuInfo.ts',
    output: {
      format: 'cjs',
      file: 'dist/node/cpuInfo.js',
    },
    external: ['cpu-instructions'],
    resolve: {
      extensions: ['.ts', '.js', '.svg'],
    },
  },
 ])
--- a/extensions/engine-management-extension/src/@types/global.d.ts
+++ b/extensions/engine-management-extension/src/@types/global.d.ts
@ -1,7 +1,11 @@
 declare const API_URL: string
 declare const CORTEX_ENGINE_VERSION: string
 declare const PLATFORM: string
 declare const SOCKET_URL: string
 declare const NODE: string
 declare const DEFAULT_REQUEST_PAYLOAD_TRANSFORM: string
 declare const DEFAULT_RESPONSE_BODY_TRANSFORM: string
 declare const DEFAULT_REQUEST_HEADERS_TRANSFORM: string
 declare const DEFAULT_REMOTE_ENGINES: ({
  id: string
--- a/extensions/engine-management-extension/src/index.ts
+++ b/extensions/engine-management-extension/src/index.ts
@ -19,12 +19,16 @@ import ky, { HTTPError } from 'ky'
 import PQueue from 'p-queue'
 import { EngineError } from './error'
 import { getJanDataFolderPath } from '@janhq/core'
 import { engineVariant } from './utils'
 interface ModelList {
  data: Model[]
 }
 /**
- * JSONEngineManagementExtension is a EngineManagementExtension implementation that provides
+ * JanEngineManagementExtension is a EngineManagementExtension implementation that provides
 * functionality for managing engines.
 */
-export default class JSONEngineManagementExtension extends EngineManagementExtension {
+export default class JanEngineManagementExtension extends EngineManagementExtension {
  queue = new PQueue({ concurrency: 1 })
  /**
@ -63,13 +67,12 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * @returns A Promise that resolves to an object of list engines.
   */
  async getRemoteModels(name: string): Promise<any> {
-    return this.queue.add(() =>
+    return ky
      ky
      .get(`${API_URL}/v1/models/remote/${name}`)
-        .json<Model[]>()
+      .json<ModelList>()
-        .then((e) => e)
+      .catch(() => ({
-        .catch(() => [])
+        data: [],
-    ) as Promise<Model[]>
+      })) as Promise<ModelList>
  }
  /**
@ -138,9 +141,38 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * Add a new remote engine
   * @returns A Promise that resolves to intall of engine.
   */
-  async addRemoteEngine(engineConfig: EngineConfig) {
+  async addRemoteEngine(
    engineConfig: EngineConfig,
    persistModels: boolean = true
  ) {
    // Populate default settings
    if (
      engineConfig.metadata?.transform_req?.chat_completions &&
      !engineConfig.metadata.transform_req.chat_completions.template
    )
      engineConfig.metadata.transform_req.chat_completions.template =
        DEFAULT_REQUEST_PAYLOAD_TRANSFORM
    if (
      engineConfig.metadata?.transform_resp?.chat_completions &&
      !engineConfig.metadata.transform_resp.chat_completions?.template
    )
      engineConfig.metadata.transform_resp.chat_completions.template =
        DEFAULT_RESPONSE_BODY_TRANSFORM
    if (engineConfig.metadata && !engineConfig.metadata?.header_template)
      engineConfig.metadata.header_template = DEFAULT_REQUEST_HEADERS_TRANSFORM
    return this.queue.add(() =>
-      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => e)
+      ky.post(`${API_URL}/v1/engines`, { json: engineConfig }).then((e) => {
        if (persistModels && engineConfig.metadata?.get_models_url) {
          // Pull /models from remote models endpoint
          return this.populateRemoteModels(engineConfig)
            .then(() => e)
            .catch(() => e)
        }
        return e
      })
    ) as Promise<{ messages: string }>
  }
@ -161,9 +193,25 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
   * @param model - Remote model object.
   */
  async addRemoteModel(model: Model) {
-    return this.queue.add(() =>
+    return this.queue
-      ky.post(`${API_URL}/v1/models/add`, { json: model }).then((e) => e)
+      .add(() =>
        ky
          .post(`${API_URL}/v1/models/add`, {
            json: {
              inference_params: {
                max_tokens: 4096,
                temperature: 0.7,
                top_p: 0.95,
                stream: true,
                frequency_penalty: 0,
                presence_penalty: 0,
              },
              ...model,
            },
          })
          .then((e) => e)
      )
      .then(() => {})
  }
  /**
@ -245,11 +293,7 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
        error instanceof EngineError
      ) {
        const systemInfo = await systemInformation()
-        const variant = await executeOnMain(
+        const variant = await engineVariant(systemInfo.gpuSetting)
          NODE,
          'engineVariant',
          systemInfo.gpuSetting
        )
        await this.setDefaultEngineVariant(InferenceEngine.cortex_llamacpp, {
          variant: variant,
          version: `${CORTEX_ENGINE_VERSION}`,
@ -293,14 +337,40 @@ export default class JSONEngineManagementExtension extends EngineManagementExten
          data.api_key = api_key
          /// END - Migrate legacy api key settings
-          await this.addRemoteEngine(data).catch(console.error)
+          await this.addRemoteEngine(data, false).catch(console.error)
        })
      )
      events.emit(EngineEvent.OnEngineUpdate, {})
-      DEFAULT_REMOTE_MODELS.forEach(async (data: Model) => {
+      await Promise.all(
-        await this.addRemoteModel(data).catch(() => {})
+        DEFAULT_REMOTE_MODELS.map((data: Model) =>
-      })
+          this.addRemoteModel(data).catch(() => {})
        )
      )
      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
    }
  }
  /**
   * Pulls models list from the remote provider and persist
   * @param engineConfig
   * @returns
   */
  private populateRemoteModels = async (engineConfig: EngineConfig) => {
    return this.getRemoteModels(engineConfig.engine)
      .then((models: ModelList) => {
        if (models?.data)
          Promise.all(
            models.data.map((model) =>
              this.addRemoteModel({
                ...model,
                engine: engineConfig.engine as InferenceEngine,
                model: model.model ?? model.id,
              }).catch(console.info)
            )
          ).then(() => {
            events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
          })
      })
      .catch(console.info)
  }
 }
--- a/extensions/engine-management-extension/src/node/cpuInfo.ts
+++ b/extensions/engine-management-extension/src/node/cpuInfo.ts
@ -1,27 +0,0 @@
 import { cpuInfo } from 'cpu-instructions'
 // Check the CPU info and determine the supported instruction set
 const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
  ? 'avx512'
  : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
    ? 'avx2'
    : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
      ? 'avx'
      : 'noavx'
 // Send the result and wait for confirmation before exiting
 new Promise<void>((resolve, reject) => {
  // @ts-ignore
  process.send(info, (error: Error | null) => {
    if (error) {
      reject(error)
    } else {
      resolve()
    }
  })
 })
  .then(() => process.exit(0))
  .catch((error) => {
    console.error('Failed to send info:', error)
    process.exit(1)
  })
--- a/extensions/engine-management-extension/src/node/index.test.ts
+++ b/extensions/engine-management-extension/src/node/index.test.ts
@ -1,7 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
 import engine from './index'
-import { GpuSetting } from '@janhq/core/node'
+import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 import { fork } from 'child_process'
 let testSettings: GpuSetting = {
@ -23,22 +22,12 @@ let testSettings: GpuSetting = {
 }
 const originalPlatform = process.platform
 jest.mock('cpu-instructions', () => ({
  cpuInfo: {
    cpuInfo: jest.fn(),
  },
 }))
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
-jest.mock('@janhq/core/node', () => ({
+
 jest.mock('@janhq/core', () => ({
  appResourcePath: () => '.',
  log: jest.fn(),
 }))
 jest.mock('child_process', () => ({
  fork: jest.fn(),
 }))
 const mockFork = fork as jest.Mock
 describe('test executable cortex file', () => {
  afterAll(function () {
@ -48,14 +37,7 @@ describe('test executable cortex file', () => {
  })
  it('executes on MacOS', () => {
-    const mockProcess = {
+
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('noavx')
        }
      }),
      send: jest.fn(),
    }
    Object.defineProperty(process, 'platform', {
      value: 'darwin',
    })
@ -63,7 +45,7 @@ describe('test executable cortex file', () => {
      value: 'arm64',
    })
-    mockFork.mockReturnValue(mockProcess)
+
    expect(engine.engineVariant(testSettings)).resolves.toEqual('mac-arm64')
  })
@ -83,7 +65,7 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
-    mockFork.mockReturnValue(mockProcess)
+
    Object.defineProperty(process, 'arch', {
      value: 'x64',
    })
@ -107,7 +89,6 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
    mockFork.mockReturnValue(mockProcess)
    expect(engine.engineVariant()).resolves.toEqual('windows-amd64-avx')
  })
@ -145,7 +126,6 @@ describe('test executable cortex file', () => {
      }),
      send: jest.fn(),
    }
    mockFork.mockReturnValue(mockProcess)
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-avx2-cuda-11-7'
@ -176,26 +156,11 @@ describe('test executable cortex file', () => {
        },
      ],
    }
    mockFork.mockReturnValue({
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('noavx')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-noavx-cuda-12-0'
    )
-    mockFork.mockReturnValue({
+
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('avx512')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'windows-amd64-avx2-cuda-12-0'
    )
@ -209,14 +174,6 @@ describe('test executable cortex file', () => {
      ...testSettings,
      run_mode: 'cpu',
    }
    mockFork.mockReturnValue({
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('noavx')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant()).resolves.toEqual('linux-amd64-noavx')
  })
@ -245,16 +202,6 @@ describe('test executable cortex file', () => {
        },
      ],
    }
    mockFork.mockReturnValue({
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('avx512')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toBe(
      'linux-amd64-avx2-cuda-11-7'
    )
@ -284,14 +231,7 @@ describe('test executable cortex file', () => {
        },
      ],
    }
-    mockFork.mockReturnValue({
+
      on: jest.fn((event, callback) => {
        if (event === 'message') {
          callback('avx2')
        }
      }),
      send: jest.fn(),
    })
    expect(engine.engineVariant(settings)).resolves.toEqual(
      'linux-amd64-avx2-cuda-12-0'
@ -310,15 +250,6 @@ describe('test executable cortex file', () => {
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
      mockFork.mockReturnValue({
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-${instruction}`
      )
@ -335,14 +266,7 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `windows-amd64-${instruction}`
      )
@ -376,14 +300,7 @@ describe('test executable cortex file', () => {
    }
    const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
@ -417,14 +334,7 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
      )
@ -459,14 +369,7 @@ describe('test executable cortex file', () => {
      ],
    }
    cpuInstructions.forEach((instruction) => {
-      mockFork.mockReturnValue({
+
        on: jest.fn((event, callback) => {
          if (event === 'message') {
            callback(instruction)
          }
        }),
        send: jest.fn(),
      })
      expect(engine.engineVariant(settings)).resolves.toEqual(
        `linux-amd64-vulkan`
      )
--- a/extensions/engine-management-extension/src/node/index.ts
+++ b/extensions/engine-management-extension/src/node/index.ts
@ -2,111 +2,10 @@ import * as path from 'path'
 import {
  appResourcePath,
  getJanDataFolderPath,
  GpuSetting,
  log,
 } from '@janhq/core/node'
 import { fork } from 'child_process'
 import { mkdir, readdir, symlink } from 'fs/promises'
 /**
 * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
 * @param settings
 * @returns
 */
 const gpuRunMode = (settings?: GpuSetting): string => {
  if (process.platform === 'darwin')
    // MacOS now has universal binaries
    return ''
  if (!settings) return ''
  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
 }
 /**
 * The OS & architecture that the current process is running on.
 * @returns win, mac-x64, mac-arm64, or linux
 */
 const os = (): string => {
  return process.platform === 'win32'
    ? 'windows-amd64'
    : process.platform === 'darwin'
      ? process.arch === 'arm64'
        ? 'mac-arm64'
        : 'mac-amd64'
      : 'linux-amd64'
 }
 /**
 * The CUDA version that will be set - either '11-7' or '12-0'.
 * @param settings
 * @returns
 */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  const isUsingCuda =
    settings?.vulkan !== true &&
    settings?.run_mode === 'gpu' &&
    !os().includes('mac')
  if (!isUsingCuda) return undefined
  return settings?.cuda?.version === '11' ? '11-7' : '12-0'
 }
 /**
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
 const cpuInstructions = async (): Promise<string> => {
  if (process.platform === 'darwin') return ''
  const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
  return new Promise((resolve, reject) => {
    child.on('message', (cpuInfo?: string) => {
      resolve(cpuInfo ?? 'noavx')
      child.kill() // Kill the child process after receiving the result
    })
    child.on('error', (err) => {
      resolve('noavx')
      child.kill()
    })
    child.on('exit', (code) => {
      if (code !== 0) {
        resolve('noavx')
        child.kill()
      }
    })
  })
 }
 /**
 * Find which variant to run based on the current platform.
 */
 const engineVariant = async (gpuSetting?: GpuSetting): Promise<string> => {
  const cpuInstruction = await cpuInstructions()
  log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
  let engineVariant = [
    os(),
    gpuSetting?.vulkan
      ? 'vulkan'
      : gpuRunMode(gpuSetting) !== 'cuda'
        ? // CPU mode - support all variants
          cpuInstruction
        : // GPU mode - packaged CUDA variants of avx2 and noavx
          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
          ? 'avx2'
          : 'noavx',
    gpuRunMode(gpuSetting),
    cudaVersion(gpuSetting),
  ]
    .filter((e) => !!e)
    .join('-')
  log(`[CORTEX]: Engine variant: ${engineVariant}`)
  return engineVariant
 }
 /**
 * Create symlink to each variant for the default bundled version
@ -148,6 +47,5 @@ const symlinkEngines = async () => {
 }
 export default {
  engineVariant,
  symlinkEngines,
 }
--- a/extensions/engine-management-extension/src/utils.ts
+++ b/extensions/engine-management-extension/src/utils.ts
@ -0,0 +1,86 @@
 import { GpuSetting, log } from '@janhq/core'
 /**
 * The GPU runMode that will be set - either 'vulkan', 'cuda', or empty for cpu.
 * @param settings
 * @returns
 */
 const gpuRunMode = (settings?: GpuSetting): string => {
  return settings.gpus?.some(
    (gpu) =>
      gpu.activated === true &&
      gpu.additional_information &&
      gpu.additional_information.driver_version
  )
    ? 'cuda'
    : ''
 }
 /**
 * The OS & architecture that the current process is running on.
 * @returns win, mac-x64, mac-arm64, or linux
 */
 const os = (settings?: GpuSetting): string => {
  return PLATFORM === 'win32'
    ? 'windows-amd64'
    : PLATFORM === 'darwin'
    ? settings?.cpu?.arch === 'arm64'
      ? 'mac-arm64'
      : 'mac-amd64'
    : 'linux-amd64'
 }
 /**
 * The CUDA version that will be set - either '11-7' or '12-0'.
 * @param settings
 * @returns
 */
 const cudaVersion = (settings?: GpuSetting): '12-0' | '11-7' | undefined => {
  const isUsingCuda =
    settings?.vulkan !== true &&
    settings?.gpus?.some((gpu) => (gpu.activated === true ? 'gpu' : 'cpu')) &&
    !os().includes('mac')
  if (!isUsingCuda) return undefined
  // return settings?.cuda?.version === '11' ? '11-7' : '12-0'
  return settings.gpus?.some((gpu) => gpu.version.includes('12'))
    ? '12-0'
    : '11-7'
 }
 /**
 * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
 * @returns
 */
 /**
 * Find which variant to run based on the current platform.
 */
 export const engineVariant = async (
  gpuSetting?: GpuSetting
 ): Promise<string> => {
  const platform = os(gpuSetting)
  // There is no need to append the variant extension for mac
  if (platform.startsWith('mac')) return platform
  let engineVariant =
    gpuSetting?.vulkan || gpuSetting.gpus.some((e) => !e.additional_information)
      ? [platform, 'vulkan']
      : [
          platform,
          gpuRunMode(gpuSetting) === 'cuda' &&
          (gpuSetting.cpu.instructions.includes('avx2') ||
            gpuSetting.cpu.instructions.includes('avx512'))
            ? 'avx2'
            : 'noavx',
          gpuRunMode(gpuSetting),
          cudaVersion(gpuSetting),
        ].filter(Boolean) // Remove any falsy values
  let engineVariantString = engineVariant.join('-')
  log(`[CORTEX]: Engine variant: ${engineVariantString}`)
  return engineVariantString
 }
--- a/extensions/hardware-management-extension/jest.config.js
+++ b/extensions/hardware-management-extension/jest.config.js
@ -0,0 +1,5 @@
 /** @type {import('ts-jest').JestConfigWithTsJest} */
 module.exports = {
  preset: 'ts-jest',
  testEnvironment: 'node',
 }
--- a/extensions/hardware-management-extension/package.json
+++ b/extensions/hardware-management-extension/package.json
@ -0,0 +1,48 @@
 {
  "name": "@janhq/hardware-management-extension",
  "productName": "Hardware Management",
  "version": "1.0.0",
  "description": "Manages Better Hardware settings.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "MIT",
  "scripts": {
    "test": "jest",
    "build": "rolldown -c rolldown.config.mjs",
    "codesign:darwin": "../../.github/scripts/auto-sign.sh",
    "codesign:win32:linux": "echo 'No codesigning required'",
    "codesign": "run-script-os",
    "build:publish": "rimraf *.tgz --glob || true && yarn build && yarn codesign && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/module.js"
  },
  "devDependencies": {
    "cpx": "^1.5.0",
    "rimraf": "^3.0.2",
    "rolldown": "^1.0.0-beta.1",
    "run-script-os": "^1.1.6",
    "ts-loader": "^9.5.0",
    "typescript": "^5.3.3"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "cpu-instructions": "^0.0.13",
    "ky": "^1.7.2",
    "p-queue": "^8.0.1"
  },
  "bundledDependencies": [
    "cpu-instructions",
    "@janhq/core"
  ],
  "hardwares": {
    "node": ">=18.0.0"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ]
 }
--- a/extensions/hardware-management-extension/rolldown.config.mjs
+++ b/extensions/hardware-management-extension/rolldown.config.mjs
@ -0,0 +1,17 @@
 import { defineConfig } from 'rolldown'
 import pkgJson from './package.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
    output: {
      format: 'esm',
      file: 'dist/index.js',
    },
    define: {
      NODE: JSON.stringify(`${pkgJson.name}/${pkgJson.node}`),
      API_URL: JSON.stringify('http://127.0.0.1:39291'),
      SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
    },
  },
 ])
--- a/extensions/hardware-management-extension/src/@types/global.d.ts
+++ b/extensions/hardware-management-extension/src/@types/global.d.ts
@ -0,0 +1,12 @@
 declare const API_URL: string
 declare const SOCKET_URL: string
 declare const NODE: string
 interface Core {
  api: APIFunctions
  events: EventEmitter
 }
 interface Window {
  core?: Core | undefined
  electronAPI?: any | undefined
 }
--- a/extensions/hardware-management-extension/src/index.ts
+++ b/extensions/hardware-management-extension/src/index.ts
@ -0,0 +1,67 @@
 import {
  executeOnMain,
  HardwareManagementExtension,
  HardwareInformation,
 } from '@janhq/core'
 import ky from 'ky'
 import PQueue from 'p-queue'
 /**
 * JSONHardwareManagementExtension is a HardwareManagementExtension implementation that provides
 * functionality for managing engines.
 */
 export default class JSONHardwareManagementExtension extends HardwareManagementExtension {
  queue = new PQueue({ concurrency: 1 })
  /**
   * Called when the extension is loaded.
   */
  async onLoad() {
    // Run Healthcheck
    this.queue.add(() => this.healthz())
  }
  /**
   * Called when the extension is unloaded.
   */
  onUnload() {}
  /**
   * Do health check on cortex.cpp
   * @returns
   */
  async healthz(): Promise<void> {
    return ky
      .get(`${API_URL}/healthz`, {
        retry: { limit: 20, delay: () => 500, methods: ['get'] },
      })
      .then(() => {})
  }
  /**
   * @returns A Promise that resolves to an object of hardware.
   */
  async getHardware(): Promise<HardwareInformation> {
    return this.queue.add(() =>
      ky
        .get(`${API_URL}/v1/hardware`)
        .json<HardwareInformation>()
        .then((e) => e)
    ) as Promise<HardwareInformation>
  }
  /**
   * @returns A Promise that resolves to an object of set gpu activate.
   */
  async setAvtiveGpu(data: { gpus: number[] }): Promise<{
    message: string
    activated_gpus: number[]
  }> {
    return this.queue.add(() =>
      ky.post(`${API_URL}/v1/hardware/activate`, { json: data }).then((e) => e)
    ) as Promise<{
      message: string
      activated_gpus: number[]
    }>
  }
 }
--- a/extensions/hardware-management-extension/tsconfig.json
+++ b/extensions/hardware-management-extension/tsconfig.json
@ -8,7 +8,9 @@
    "forceConsistentCasingInFileNames": true,
    "strict": false,
    "skipLibCheck": true,
-    "rootDir": "./src"
+    "rootDir": "./src",
    "resolveJsonModule": true
  },
-  "include": ["./src"]
+  "include": ["./src"],
  "exclude": ["src/**/*.test.ts", "rolldown.config.mjs"]
 }
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@ -1 +1 @@
-1.0.9-rc4
+1.0.10
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@ -1,7 +1,7 @@
 {
  "name": "@janhq/inference-cortex-extension",
  "productName": "Cortex Inference Engine",
-  "version": "1.0.24",
+  "version": "1.0.25",
  "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
--- a/extensions/inference-cortex-extension/resources/default_settings.json
+++ b/extensions/inference-cortex-extension/resources/default_settings.json
@ -76,7 +76,7 @@
  },
  {
    "key": "use_mmap",
-    "title": "MMAP",
+    "title": "mmap",
    "description": "Loads model files more efficiently by mapping them to memory, reducing RAM usage.",
    "controllerType": "checkbox",
    "controllerProps": {
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-70b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-70B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-llama-70b",
  "object": "model",
  "name": "DeepSeek R1 Distill Llama 70B Q4",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Llama-70B-Q4_K_M.gguf",
    "ngl": 81
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["70B", "Featured"],
    "size": 42500000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-llama-8b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-llama-8b",
  "object": "model",
  "name": "DeepSeek R1 Distill Llama 8B Q5",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf",
    "ngl": 33
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["8B", "Featured"],
    "size": 5730000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-1.5b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-1.5B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-1.5b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 1.5B Q5",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-1.5B-Q5_K_M.gguf",
    "ngl": 29
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["1.5B", "Featured"],
    "size": 1290000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-14b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-14B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-14b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 14B Q4",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-14B-Q4_K_M.gguf",
    "ngl": 49
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["14B", "Featured"],
    "size": 8990000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-32b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-32B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-32b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 32B Q4",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-32B-Q4_K_M.gguf",
    "ngl": 65
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["32B", "Featured"],
    "size": 19900000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-r1-distill-qwen-7b/model.json
@ -0,0 +1,35 @@
 {
  "sources": [
    {
      "filename": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
      "url": "https://huggingface.co/unsloth/DeepSeek-R1-Distill-Qwen-7B-GGUF/resolve/main/DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf"
    }
  ],
  "id": "deepseek-r1-distill-qwen-7b",
  "object": "model",
  "name": "DeepSeek R1 Distill Qwen 7B Q5",
  "version": "1.0",
  "description": "DeepSeek-R1 is a cheaper and open-source model that excels at agentic reasoning, superior multilingual capabilities, large context windows, and generalization across domains.",
  "format": "gguf",
  "settings": {
    "ctx_len": 131072,
    "prompt_template": "<｜User｜> {prompt} <｜Assistant｜>",
    "llama_model_path": "DeepSeek-R1-Distill-Qwen-7B-Q5_K_M.gguf",
    "ngl": 29
  },
  "parameters": {
    "temperature": 0.6,
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 131072,
    "stop": [],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "DeepSeek",
    "tags": ["7B", "Featured"],
    "size": 5440000000
  },
  "engine": "llama-cpp"
 }
--- a/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@ -22,19 +22,13 @@
    "top_p": 0.95,
    "stream": true,
    "max_tokens": 8192,
-    "stop": [
+    "stop": ["<|end_of_text|>", "<|eot_id|>", "<|eom_id|>"],
      "<|end_of_text|>",
      "<|eot_id|>",
      "<|eom_id|>"
    ],
    "frequency_penalty": 0,
    "presence_penalty": 0
  },
  "metadata": {
    "author": "MetaAI",
-    "tags": [
+    "tags": ["8B", "Featured"],
      "8B", "Featured"
    ],
    "size": 4920000000
  },
  "engine": "llama-cpp"
--- a/extensions/inference-cortex-extension/rolldown.config.mjs
+++ b/extensions/inference-cortex-extension/rolldown.config.mjs
@ -49,6 +49,13 @@ import qwen2514bJson from './resources/models/qwen2.5-14b-instruct/model.json' w
 import qwen2532bJson from './resources/models/qwen2.5-32b-instruct/model.json' with { type: 'json' }
 import qwen2572bJson from './resources/models/qwen2.5-72b-instruct/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_1_5b from './resources/models/deepseek-r1-distill-qwen-1.5b/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_7b from './resources/models/deepseek-r1-distill-qwen-7b/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_14b from './resources/models/deepseek-r1-distill-qwen-14b/model.json' with { type: 'json' }
 import deepseekR1DistillQwen_32b from './resources/models/deepseek-r1-distill-qwen-32b/model.json' with { type: 'json' }
 import deepseekR1DistillLlama_8b from './resources/models/deepseek-r1-distill-llama-8b/model.json' with { type: 'json' }
 import deepseekR1DistillLlama_70b from './resources/models/deepseek-r1-distill-llama-70b/model.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
@ -106,12 +113,18 @@ export default defineConfig([
        qwen2514bJson,
        qwen2532bJson,
        qwen2572bJson,
        deepseekR1DistillQwen_1_5b,
        deepseekR1DistillQwen_7b,
        deepseekR1DistillQwen_14b,
        deepseekR1DistillQwen_32b,
        deepseekR1DistillLlama_8b,
        deepseekR1DistillLlama_70b,
      ]),
      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      SETTINGS: JSON.stringify(defaultSettingJson),
      CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
      CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
-      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.46'),
+      CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.49'),
    },
  },
  {
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@ -112,8 +112,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    if (!Number.isNaN(threads_number)) this.cpu_threads = threads_number
    // Run the process watchdog
-    const systemInfo = await systemInformation()
+    // const systemInfo = await systemInformation()
-    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
+    this.queue.add(() => executeOnMain(NODE, 'run'))
    this.queue.add(() => this.healthz())
    this.subscribeToEvents()
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@ -16,15 +16,20 @@ let watchdog: ProcessWatchdog | undefined = undefined
 * Spawns a Nitro subprocess.
 * @returns A promise that resolves when the Nitro subprocess is started.
 */
-function run(systemInfo?: SystemInformation): Promise<any> {
+function run(): Promise<any> {
  log(`[CORTEX]:: Spawning cortex subprocess...`)
  return new Promise<void>(async (resolve, reject) => {
-    let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
+    // let gpuVisibleDevices = systemInfo?.gpuSetting?.gpus_in_use.join(',') ?? ''
-    let binaryName = `cortex-server${process.platform === 'win32' ? '.exe' : ''}`
+    let binaryName = `cortex-server${
      process.platform === 'win32' ? '.exe' : ''
    }`
    const binPath = path.join(__dirname, '..', 'bin')
    const executablePath = path.join(binPath, binaryName)
    addEnvPaths(binPath)
    const sharedPath = path.join(appResourcePath(), 'shared')
    // Execute the binary
    log(`[CORTEX]:: Spawn cortex at path: ${executablePath}`)
@ -44,15 +49,17 @@ function run(systemInfo?: SystemInformation): Promise<any> {
        `${path.join(dataFolderPath, '.janrc')}`,
        '--data_folder_path',
        dataFolderPath,
        '--loglevel',
        'INFO',
      ],
      {
        env: {
          ...process.env,
-          CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
+          // CUDA_VISIBLE_DEVICES: gpuVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
+          // // Vulkan - Support 1 device at a time for now
-          ...(gpuVisibleDevices?.length > 0 && {
+          // ...(gpuVisibleDevices?.length > 0 && {
-            GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
+          //   GGML_VK_VISIBLE_DEVICES: gpuVisibleDevices,
-          }),
+          // }),
        },
        cwd: sharedPath,
      }
@ -71,6 +78,22 @@ function dispose() {
  watchdog?.terminate()
 }
 /**
 * Set the environment paths for the cortex subprocess
 * @param dest
 */
 function addEnvPaths(dest: string) {
  // Add engine path to the PATH and LD_LIBRARY_PATH
  if (process.platform === 'win32') {
    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
  } else {
    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
      path.delimiter,
      dest
    )
  }
 }
 /**
 * Cortex process info
 */
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@ -14,8 +14,6 @@ import {
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
 import { downloadModel } from './legacy/download'
 import { systemInformation } from '@janhq/core'
 import { deleteModelFiles } from './legacy/delete'
 export enum Settings {
@ -70,18 +68,6 @@ export default class JanModelExtension extends ModelExtension {
   * @returns A Promise that resolves when the model is downloaded.
   */
  async pullModel(model: string, id?: string, name?: string): Promise<void> {
    if (id) {
      const model: Model = ModelManager.instance().get(id)
      // Clip vision model - should not be handled by cortex.cpp
      // TensorRT model - should not be handled by cortex.cpp
      if (
        model &&
        (model.engine === InferenceEngine.nitro_tensorrt_llm ||
          model.settings.vision_model)
      ) {
        return downloadModel(model, (await systemInformation()).gpuSetting)
      }
    }
    /**
     * Sending POST to /models/pull/{id} endpoint to pull the model
     */
--- a/extensions/model-extension/src/legacy/download.ts
+++ b/extensions/model-extension/src/legacy/download.ts
@ -2,15 +2,12 @@ import {
  downloadFile,
  DownloadRequest,
  fs,
  GpuSetting,
  InferenceEngine,
  joinPath,
  Model,
 } from '@janhq/core'
 export const downloadModel = async (
  model: Model,
  gpuSettings?: GpuSetting,
  network?: { ignoreSSL?: boolean; proxy?: string }
 ): Promise<void> => {
  const homedir = 'file://models'
@ -27,41 +24,6 @@ export const downloadModel = async (
      JSON.stringify(model, null, 2)
    )
  if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
    if (!gpuSettings || gpuSettings.gpus.length === 0) {
      console.error('No GPU found. Please check your GPU setting.')
      return
    }
    const firstGpu = gpuSettings.gpus[0]
    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
      console.error('No Nvidia GPU found. Please check your GPU setting.')
      return
    }
    const gpuArch = firstGpu.arch
    if (gpuArch === undefined) {
      console.error('No GPU architecture found. Please check your GPU setting.')
      return
    }
    if (!supportedGpuArch.includes(gpuArch)) {
      console.debug(
        `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
      )
      return
    }
    const os = 'windows' // TODO: remove this hard coded value
    const newSources = model.sources.map((source) => {
      const newSource = { ...source }
      newSource.url = newSource.url
        .replace(/<os>/g, os)
        .replace(/<gpuarch>/g, gpuArch)
      return newSource
    })
    model.sources = newSources
  }
  console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
  if (model.sources.length > 1) {
--- a/extensions/monitoring-extension/README.md
+++ b/extensions/monitoring-extension/README.md
@ -1,75 +0,0 @@
 # Create a Jan Extension using Typescript
 Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
 ## Create Your Own Extension
 To create your own extension, you can use this repository as a template! Just follow the below instructions:
 1. Click the Use this template button at the top of the repository
 2. Select Create a new repository
 3. Select an owner and name for your new repository
 4. Click Create repository
 5. Clone your new repository
 ## Initial Setup
 After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
 > [!NOTE]
 >
 > You'll need to have a reasonably modern version of
 > [Node.js](https://nodejs.org) handy. If you are using a version manager like
 > [`nodenv`](https://github.com/nodenv/nodenv) or
 > [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
 > root of your repository to install the version specified in
 > [`package.json`](./package.json). Otherwise, 20.x or later should work!
 1. :hammer_and_wrench: Install the dependencies
   ```bash
   npm install
   ```
 1. :building_construction: Package the TypeScript for distribution
   ```bash
   npm run bundle
   ```
 1. :white_check_mark: Check your artifact
   There will be a tgz file in your extension directory now
 ## Update the Extension Metadata
 The [`package.json`](package.json) file defines metadata about your extension, such as
 extension name, main entry, description and version.
 When you copy this repository, update `package.json` with the name, description for your extension.
 ## Update the Extension Code
 The [`src/`](./src/) directory is the heart of your extension! This contains the
 source code that will be run when your extension functions are invoked. You can replace the
 contents of this directory with your own code.
 There are a few things to keep in mind when writing your extension code:
 - Most Jan Extension functions are processed asynchronously.
  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
  ```typescript
  import { events, MessageEvent, MessageRequest } from '@janhq/core'
  function onStart(): Promise<any> {
    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
      this.inference(data)
    )
  }
  ```
  For more information about the Jan Extension Core module, see the
  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
 So, what are you waiting for? Go ahead and start customizing your extension!
--- a/extensions/monitoring-extension/bin/.gitkeep
+++ b/extensions/monitoring-extension/bin/.gitkeep
--- a/extensions/monitoring-extension/download.bat
+++ b/extensions/monitoring-extension/download.bat
@ -1,2 +0,0 @@
@echo off
 .\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin
--- a/extensions/monitoring-extension/package.json
+++ b/extensions/monitoring-extension/package.json
@ -1,49 +0,0 @@
 {
  "name": "@janhq/monitoring-extension",
  "productName": "System Monitoring",
  "version": "1.0.10",
  "description": "Provides system health and OS level data.",
  "main": "dist/index.js",
  "node": "dist/node/index.cjs.js",
  "author": "Jan <service@jan.ai>",
  "license": "AGPL-3.0",
  "scripts": {
    "build": "rolldown -c rolldown.config.mjs && yarn download-artifacts",
    "download-artifacts": "run-script-os && cpx \"bin/**\" \"dist/bin\"",
    "download-artifacts:darwin": "echo 'No artifacts to download for darwin'",
    "download-artifacts:win32": "download.bat",
    "download-artifacts:linux": "download https://catalog.jan.ai/vulkaninfo -o ./bin && chmod +x ./bin/vulkaninfo",
    "build:publish": "rimraf *.tgz --glob || true && yarn build && npm pack && cpx *.tgz ../../pre-install"
  },
  "exports": {
    ".": "./dist/index.js",
    "./main": "./dist/node/index.cjs.js"
  },
  "devDependencies": {
    "@types/node": "^20.11.4",
    "@types/node-os-utils": "^1.3.4",
    "cpx": "^1.5.0",
    "download-cli": "^1.1.1",
    "rimraf": "^3.0.2",
    "rolldown": "1.0.0-beta.1",
    "run-script-os": "^1.1.6",
    "typescript": "^5.3.3"
  },
  "dependencies": {
    "@janhq/core": "../../core/package.tgz",
    "node-os-utils": "^1.3.7"
  },
  "files": [
    "dist/*",
    "package.json",
    "README.md"
  ],
  "bundleDependencies": [
    "node-os-utils",
    "@janhq/core"
  ],
  "installConfig": {
    "hoistingLimits": "workspaces"
  },
  "packageManager": "yarn@4.5.3"
 }
--- a/extensions/monitoring-extension/resources/settings.json
+++ b/extensions/monitoring-extension/resources/settings.json
@ -1,22 +0,0 @@
 [
  {
    "key": "log-enabled",
    "title": "Enable App Logs",
    "description": "Saves app logs locally on your computer. This enables you to send us crash reports.",
    "controllerType": "checkbox",
    "controllerProps": {
      "value": true
    }
  },
  {
    "key": "log-cleaning-interval",
    "title": "Log Cleaning Interval",
    "description": "Automatically delete local logs after a certain time interval (in milliseconds).",
    "controllerType": "input",
    "controllerProps": {
      "value": "120000",
      "placeholder": "Interval in milliseconds. E.g. 120000",
      "textAlign": "right"
    }
  }
 ]
--- a/extensions/monitoring-extension/rolldown.config.mjs
+++ b/extensions/monitoring-extension/rolldown.config.mjs
@ -1,32 +0,0 @@
 import { defineConfig } from 'rolldown'
 import packageJson from './package.json' with { type: 'json' }
 import settingJson from './resources/settings.json' with { type: 'json' }
 export default defineConfig([
  {
    input: 'src/index.ts',
    output: {
      format: 'esm',
      file: 'dist/index.js',
    },
    platform: 'browser',
    define: {
      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
      SETTINGS: JSON.stringify(settingJson),
    },
  },
  {
    input: 'src/node/index.ts',
    external: ['@janhq/core/node'],
    output: {
      format: 'cjs',
      file: 'dist/node/index.cjs.js',
      sourcemap: false,
      inlineDynamicImports: true,
    },
    resolve: {
      extensions: ['.js', '.ts', '.json'],
    },
    platform: 'node',
  },
 ])
--- a/extensions/monitoring-extension/src/@types/global.d.ts
+++ b/extensions/monitoring-extension/src/@types/global.d.ts
@ -1,19 +0,0 @@
 declare const NODE: string
 declare const SETTINGS: SettingComponentProps[]
 type CpuGpuInfo = {
  cpu: {
    usage: number
  }
  gpu: GpuInfo[]
 }
 type GpuInfo = {
  id: string
  name: string
  temperature: string
  utilization: string
  memoryTotal: string
  memoryFree: string
  memoryUtilization: string
 }
--- a/extensions/monitoring-extension/src/index.ts
+++ b/extensions/monitoring-extension/src/index.ts
@ -1,90 +0,0 @@
 import {
  AppConfigurationEventName,
  GpuSetting,
  MonitoringExtension,
  OperatingSystemInfo,
  events,
  executeOnMain,
 } from '@janhq/core'
 enum Settings {
  logEnabled = 'log-enabled',
  logCleaningInterval = 'log-cleaning-interval',
 }
 /**
 * JanMonitoringExtension is a extension that provides system monitoring functionality.
 * It implements the MonitoringExtension interface from the @janhq/core package.
 */
 export default class JanMonitoringExtension extends MonitoringExtension {
  /**
   * Called when the extension is loaded.
   */
  async onLoad() {
    // Register extension settings
    this.registerSettings(SETTINGS)
    const logEnabled = await this.getSetting<boolean>(Settings.logEnabled, true)
    const logCleaningInterval = parseInt(
      await this.getSetting<string>(Settings.logCleaningInterval, '120000')
    )
    // Register File Logger provided by this extension
    await executeOnMain(NODE, 'registerLogger', {
      logEnabled,
      logCleaningInterval: isNaN(logCleaningInterval)
        ? 120000
        : logCleaningInterval,
    })
    // Attempt to fetch nvidia info
    await executeOnMain(NODE, 'updateNvidiaInfo')
    events.emit(AppConfigurationEventName.OnConfigurationUpdate, {})
  }
  onSettingUpdate<T>(key: string, value: T): void {
    if (key === Settings.logEnabled) {
      executeOnMain(NODE, 'updateLogger', { logEnabled: value })
    } else if (key === Settings.logCleaningInterval) {
      executeOnMain(NODE, 'updateLogger', { logCleaningInterval: value })
    }
  }
  /**
   * Called when the extension is unloaded.
   */
  onUnload(): void {
    // Register File Logger provided by this extension
    executeOnMain(NODE, 'unregisterLogger')
  }
  /**
   * Returns the GPU configuration.
   * @returns A Promise that resolves to an object containing the GPU configuration.
   */
  async getGpuSetting(): Promise<GpuSetting | undefined> {
    return executeOnMain(NODE, 'getGpuConfig')
  }
  /**
   * Returns information about the system resources.
   * @returns A Promise that resolves to an object containing information about the system resources.
   */
  getResourcesInfo(): Promise<any> {
    return executeOnMain(NODE, 'getResourcesInfo')
  }
  /**
   * Returns information about the current system load.
   * @returns A Promise that resolves to an object containing information about the current system load.
   */
  getCurrentLoad(): Promise<any> {
    return executeOnMain(NODE, 'getCurrentLoad')
  }
  /**
   * Returns information about the OS
   * @returns
   */
  getOsInfo(): Promise<OperatingSystemInfo> {
    return executeOnMain(NODE, 'getOsInfo')
  }
 }
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@ -1,389 +0,0 @@
 import {
  GpuSetting,
  GpuSettingInfo,
  LoggerManager,
  OperatingSystemInfo,
  ResourceInfo,
  SupportedPlatforms,
  getJanDataFolderPath,
  log,
 } from '@janhq/core/node'
 import { mem, cpu } from 'node-os-utils'
 import { exec } from 'child_process'
 import { writeFileSync, existsSync, readFileSync, mkdirSync } from 'fs'
 import path from 'path'
 import os from 'os'
 import { FileLogger } from './logger'
 /**
 * Path to the settings directory
 **/
 export const SETTINGS_DIR = path.join(getJanDataFolderPath(), 'settings')
 /**
 * Path to the settings file
 **/
 export const GPU_INFO_FILE = path.join(SETTINGS_DIR, 'settings.json')
 /**
 * Default GPU settings
 * TODO: This needs to be refactored to support multiple accelerators
 **/
 const DEFAULT_SETTINGS: GpuSetting = {
  notify: true,
  run_mode: 'cpu',
  nvidia_driver: {
    exist: false,
    version: '',
  },
  cuda: {
    exist: false,
    version: '',
  },
  gpus: [],
  gpu_highest_vram: '',
  gpus_in_use: [],
  is_initial: true,
  // TODO: This needs to be set based on user toggle in settings
  vulkan: false,
 }
 export const getGpuConfig = async (): Promise<GpuSetting | undefined> => {
  if (process.platform === 'darwin') return undefined
  if (existsSync(GPU_INFO_FILE))
    return JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
  return DEFAULT_SETTINGS
 }
 export const getResourcesInfo = async (): Promise<ResourceInfo> => {
  const ramUsedInfo = await mem.used()
  const totalMemory = ramUsedInfo.totalMemMb * 1024 * 1024
  const usedMemory = ramUsedInfo.usedMemMb * 1024 * 1024
  const resourceInfo: ResourceInfo = {
    mem: {
      totalMemory,
      usedMemory,
    },
  }
  return resourceInfo
 }
 export const getCurrentLoad = () =>
  new Promise<CpuGpuInfo>(async (resolve, reject) => {
    const cpuPercentage = await cpu.usage()
    let data = {
      run_mode: 'cpu',
      gpus_in_use: [],
    }
    if (process.platform !== 'darwin') {
      data = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    }
    if (data.run_mode === 'gpu' && data.gpus_in_use.length > 0) {
      const gpuIds = data.gpus_in_use.join(',')
      if (gpuIds !== '' && data['vulkan'] !== true) {
        exec(
          `nvidia-smi --query-gpu=index,name,temperature.gpu,utilization.gpu,memory.total,memory.free,utilization.memory --format=csv,noheader,nounits --id=${gpuIds}`,
          (error, stdout, _) => {
            if (error) {
              console.error(`exec error: ${error}`)
              throw new Error(error.message)
            }
            const gpuInfo: GpuInfo[] = stdout
              .trim()
              .split('\n')
              .map((line) => {
                const [
                  id,
                  name,
                  temperature,
                  utilization,
                  memoryTotal,
                  memoryFree,
                  memoryUtilization,
                ] = line.split(', ').map((item) => item.replace(/\r/g, ''))
                return {
                  id,
                  name,
                  temperature,
                  utilization,
                  memoryTotal,
                  memoryFree,
                  memoryUtilization,
                }
              })
            resolve({
              cpu: { usage: cpuPercentage },
              gpu: gpuInfo,
            })
          }
        )
      } else {
        // Handle the case where gpuIds is empty
        resolve({
          cpu: { usage: cpuPercentage },
          gpu: [],
        })
      }
    } else {
      // Handle the case where run_mode is not 'gpu' or no GPUs are in use
      resolve({
        cpu: { usage: cpuPercentage },
        gpu: [],
      })
    }
  })
 /**
 * This will retrieve GPU information and persist settings.json
 * Will be called when the extension is loaded to turn on GPU acceleration if supported
 */
 export const updateNvidiaInfo = async () => {
  // ignore if macos
  if (process.platform === 'darwin') return
  try {
    JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
  } catch (error) {
    if (!existsSync(SETTINGS_DIR)) {
      mkdirSync(SETTINGS_DIR, {
        recursive: true,
      })
    }
    writeFileSync(GPU_INFO_FILE, JSON.stringify(DEFAULT_SETTINGS, null, 2))
  }
  await updateNvidiaDriverInfo()
  await updateGpuInfo()
 }
 const updateNvidiaDriverInfo = async () =>
  new Promise((resolve, reject) => {
    exec(
      'nvidia-smi --query-gpu=driver_version --format=csv,noheader',
      (error, stdout) => {
        const data: GpuSetting = JSON.parse(
          readFileSync(GPU_INFO_FILE, 'utf-8')
        )
        if (!error) {
          const firstLine = stdout.split('\n')[0].trim()
          data.nvidia_driver.exist = true
          data.nvidia_driver.version = firstLine
        } else {
          data.nvidia_driver.exist = false
        }
        writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
        resolve({})
      }
    )
  })
 const getGpuArch = (gpuName: string): string => {
  if (!gpuName.toLowerCase().includes('nvidia')) return 'unknown'
  if (gpuName.includes('30')) return 'ampere'
  else if (gpuName.includes('40')) return 'ada'
  else return 'unknown'
 }
 const updateGpuInfo = async () =>
  new Promise((resolve, reject) => {
    let data: GpuSetting = JSON.parse(readFileSync(GPU_INFO_FILE, 'utf-8'))
    // Cuda
    if (data.vulkan === true) {
      // Vulkan
      exec(
        process.platform === 'win32'
          ? `${__dirname}\\..\\bin\\vulkaninfoSDK.exe --summary`
          : `${__dirname}/../bin/vulkaninfo --summary`,
        async (error, stdout) => {
          if (!error) {
            const output = stdout.toString()
            log(output)
            const gpuRegex = /GPU(\d+):(?:[\s\S]*?)deviceName\s*=\s*(.*)/g
            const gpus: GpuSettingInfo[] = []
            let match
            while ((match = gpuRegex.exec(output)) !== null) {
              const id = match[1]
              const name = match[2]
              const arch = getGpuArch(name)
              gpus.push({ id, vram: '0', name, arch })
            }
            data.gpus = gpus
            if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
              data.gpus_in_use = [data.gpus.length > 1 ? '1' : '0']
            }
            data = await updateCudaExistence(data)
            writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
            log(`[APP]::${JSON.stringify(data)}`)
            resolve({})
          } else {
            reject(error)
          }
        }
      )
    } else {
      exec(
        'nvidia-smi --query-gpu=index,memory.total,name --format=csv,noheader,nounits',
        async (error, stdout) => {
          if (!error) {
            log(`[SPECS]::${stdout}`)
            // Get GPU info and gpu has higher memory first
            let highestVram = 0
            let highestVramId = '0'
            const gpus: GpuSettingInfo[] = stdout
              .trim()
              .split('\n')
              .map((line) => {
                let [id, vram, name] = line.split(', ')
                const arch = getGpuArch(name)
                vram = vram.replace(/\r/g, '')
                if (parseFloat(vram) > highestVram) {
                  highestVram = parseFloat(vram)
                  highestVramId = id
                }
                return { id, vram, name, arch }
              })
            data.gpus = gpus
            data.gpu_highest_vram = highestVramId
          } else {
            data.gpus = []
            data.gpu_highest_vram = undefined
          }
          if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
          }
          data = await updateCudaExistence(data)
          console.log('[MONITORING]::Cuda info: ', data)
          writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
          log(`[APP]::${JSON.stringify(data)}`)
          resolve({})
        }
      )
    }
  })
 /**
 * Check if file exists in paths
 */
 const checkFileExistenceInPaths = (file: string, paths: string[]): boolean => {
  return paths.some((p) => existsSync(path.join(p, file)))
 }
 /**
 * Validate cuda for linux and windows
 */
 const updateCudaExistence = async (
  data: GpuSetting = DEFAULT_SETTINGS
 ): Promise<GpuSetting> => {
  let filesCuda12: string[]
  let filesCuda11: string[]
  let paths: string[]
  let cudaVersion: string = ''
  if (process.platform === 'win32') {
    filesCuda12 = ['cublas64_12.dll', 'cudart64_12.dll', 'cublasLt64_12.dll']
    filesCuda11 = ['cublas64_11.dll', 'cudart64_110.dll', 'cublasLt64_11.dll']
    paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : []
  } else {
    filesCuda12 = ['libcudart.so.12', 'libcublas.so.12', 'libcublasLt.so.12']
    filesCuda11 = ['libcudart.so.11.0', 'libcublas.so.11', 'libcublasLt.so.11']
    paths = process.env.LD_LIBRARY_PATH
      ? process.env.LD_LIBRARY_PATH.split(path.delimiter)
      : []
    paths.push('/usr/lib/x86_64-linux-gnu/')
  }
  let cudaExists = filesCuda12.every(
    (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
  )
  if (!cudaExists) {
    cudaExists = filesCuda11.every(
      (file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
    )
    if (cudaExists) {
      cudaVersion = '11'
    }
  } else {
    cudaVersion = '12'
  }
  data.cuda.exist = cudaExists
  data.cuda.version = cudaVersion
  console.debug(data.is_initial, data.gpus_in_use)
  if (cudaExists && data.is_initial && data.gpus_in_use.length > 0) {
    data.run_mode = 'gpu'
  }
  data.is_initial = false
  // Attempt to query CUDA using NVIDIA SMI
  if (!cudaExists) {
    await new Promise<void>((resolve) => {
      exec('nvidia-smi', (error, stdout) => {
        if (!error) {
          const regex = /CUDA\s*Version:\s*(\d+\.\d+)/g
          const match = regex.exec(stdout)
          if (match && match[1]) {
            data.cuda.version = match[1]
          }
        }
        console.log('[MONITORING]::Finalized cuda info update: ', data)
        resolve()
      })
    })
  }
  return data
 }
 export const getOsInfo = (): OperatingSystemInfo => {
  const platform =
    SupportedPlatforms.find((p) => p === process.platform) || 'unknown'
  const osInfo: OperatingSystemInfo = {
    platform: platform,
    arch: process.arch,
    release: os.release(),
    machine: os.machine(),
    version: os.version(),
    totalMem: os.totalmem(),
    freeMem: os.freemem(),
  }
  return osInfo
 }
 export const registerLogger = ({ logEnabled, logCleaningInterval }) => {
  const logger = new FileLogger(logEnabled, logCleaningInterval)
  LoggerManager.instance().register(logger)
  logger.cleanLogs()
 }
 export const unregisterLogger = () => {
  LoggerManager.instance().unregister('file')
 }
 export const updateLogger = ({ logEnabled, logCleaningInterval }) => {
  const logger = LoggerManager.instance().loggers.get('file') as FileLogger
  if (logger && logEnabled !== undefined) logger.logEnabled = logEnabled
  if (logger && logCleaningInterval)
    logger.logCleaningInterval = logCleaningInterval
  // Rerun
  logger && logger.cleanLogs()
 }
--- a/Show More
+++ b/Show More
		`@ -1,2 +0,0 @@`
			`export * from './monitoringInterface'`
			`export * from './resourceInfo'`
		`@ -1,2 +0,0 @@`
			`@echo off`
			`.\node_modules\.bin\download https://catalog.jan.ai/vulkaninfoSDK.exe -o ./bin`