diff --git a/extensions/tensorrt-llm-extension/README.md b/extensions/tensorrt-llm-extension/README.md
deleted file mode 100644
index 34a670516..000000000
--- a/extensions/tensorrt-llm-extension/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Tensorrt-LLM Extension
-
-Created using Jan extension example
-
-# Create a Jan Extension using Typescript
-
-Use this template to bootstrap the creation of a TypeScript Jan extension. 🚀
-
-## Create Your Own Extension
-
-To create your own extension, you can use this repository as a template! Just follow the below instructions:
-
-1. Click the Use this template button at the top of the repository
-2. Select Create a new repository
-3. Select an owner and name for your new repository
-4. Click Create repository
-5. Clone your new repository
-
-## Initial Setup
-
-After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your extension.
-
-> [!NOTE]
->
-> You'll need to have a reasonably modern version of
-> [Node.js](https://nodejs.org) handy. If you are using a version manager like
-> [`nodenv`](https://github.com/nodenv/nodenv) or
-> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
-> root of your repository to install the version specified in
-> [`package.json`](./package.json). Otherwise, 20.x or later should work!
-
-1. :hammer_and_wrench: Install the dependencies
-
-   ```bash
-   npm install
-   ```
-
-1. :building_construction: Package the TypeScript for distribution
-
-   ```bash
-   npm run bundle
-   ```
-
-1. :white_check_mark: Check your artifact
-
-   There will be a tgz file in your extension directory now
-
-## Update the Extension Metadata
-
-The [`package.json`](package.json) file defines metadata about your extension, such as
-extension name, main entry, description and version.
-
-When you copy this repository, update `package.json` with the name, description for your extension.
-
-## Update the Extension Code
-
-The [`src/`](./src/) directory is the heart of your extension! This contains the
-source code that will be run when your extension functions are invoked. You can replace the
-contents of this directory with your own code.
-
-There are a few things to keep in mind when writing your extension code:
-
-- Most Jan Extension functions are processed asynchronously.
-  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
-
-  ```typescript
-  import { events, MessageEvent, MessageRequest } from '@janhq/core'
-
-  function onStart(): Promise<any> {
-    return events.on(MessageEvent.OnMessageSent, (data: MessageRequest) =>
-      this.inference(data)
-    )
-  }
-  ```
-
-  For more information about the Jan Extension Core module, see the
-  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
-
-So, what are you waiting for? Go ahead and start customizing your extension!
diff --git a/extensions/tensorrt-llm-extension/jest.config.js b/extensions/tensorrt-llm-extension/jest.config.js
deleted file mode 100644
index 3e32adceb..000000000
--- a/extensions/tensorrt-llm-extension/jest.config.js
+++ /dev/null
@@ -1,9 +0,0 @@
-/** @type {import('ts-jest').JestConfigWithTsJest} */
-module.exports = {
-  preset: 'ts-jest',
-  testEnvironment: 'node',
-  transform: {
-    'node_modules/@janhq/core/.+\\.(j|t)s?$': 'ts-jest',
-  },
-  transformIgnorePatterns: ['node_modules/(?!@janhq/core/.*)'],
-}
diff --git a/extensions/tensorrt-llm-extension/package.json b/extensions/tensorrt-llm-extension/package.json
deleted file mode 100644
index 1cc098dde..000000000
--- a/extensions/tensorrt-llm-extension/package.json
+++ /dev/null
@@ -1,78 +0,0 @@
-{
-  "name": "@janhq/tensorrt-llm-extension",
-  "productName": "TensorRT-LLM Inference Engine",
-  "version": "0.0.3",
-  "description": "This extension enables Nvidia's TensorRT-LLM for the fastest GPU acceleration. See the [setup guide](https://jan.ai/guides/providers/tensorrt-llm/) for next steps.",
-  "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "config": {
-    "host": "127.0.0.1",
-    "port": "3929"
-  },
-  "compatibility": {
-    "platform": [
-      "win32"
-    ],
-    "app": [
-      "0.1.0"
-    ]
-  },
-  "tensorrtVersion": "0.1.8",
-  "provider": "nitro-tensorrt-llm",
-  "scripts": {
-    "test": "jest",
-    "build": "rolldown -c rolldown.config.mjs",
-    "build:publish": "rimraf *.tgz --glob || true && yarn build && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install"
-  },
-  "exports": {
-    ".": "./dist/index.js",
-    "./main": "./dist/node/index.cjs.js"
-  },
-  "devDependencies": {
-    "@types/decompress": "4.2.7",
-    "@types/jest": "^29.5.12",
-    "@types/node": "^20.11.4",
-    "@types/os-utils": "^0.0.4",
-    "@types/tcp-port-used": "^1.0.4",
-    "cpx": "^1.5.0",
-    "download-cli": "^1.1.1",
-    "jest": "^29.7.0",
-    "jest-junit": "^16.0.0",
-    "jest-runner": "^29.7.0",
-    "rimraf": "^3.0.2",
-    "rolldown": "1.0.0-beta.1",
-    "run-script-os": "^1.1.6",
-    "ts-jest": "^29.2.5",
-    "typescript": "^5.2.2"
-  },
-  "dependencies": {
-    "@janhq/core": "../../core/package.tgz",
-    "decompress": "^4.2.1",
-    "fetch-retry": "^5.0.6",
-    "rxjs": "^7.8.1",
-    "tcp-port-used": "^1.0.2",
-    "terminate": "^2.6.1",
-    "ulidx": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "tcp-port-used",
-    "fetch-retry",
-    "decompress",
-    "@janhq/core",
-    "terminate"
-  ],
-  "installConfig": {
-    "hoistingLimits": "workspaces"
-  },
-  "packageManager": "yarn@4.5.3"
-}
diff --git a/extensions/tensorrt-llm-extension/resources/models.json b/extensions/tensorrt-llm-extension/resources/models.json
deleted file mode 100644
index 387b71104..000000000
--- a/extensions/tensorrt-llm-extension/resources/models.json
+++ /dev/null
@@ -1,156 +0,0 @@
-[
-  {
-    "sources": [
-      {
-        "filename": "config.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/config.json"
-      },
-      {
-        "filename": "mistral_float16_tp1_rank0.engine",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
-      },
-      {
-        "filename": "tokenizer.model",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.model"
-      },
-      {
-        "filename": "special_tokens_map.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/special_tokens_map.json"
-      },
-      {
-        "filename": "tokenizer.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer.json"
-      },
-      {
-        "filename": "tokenizer_config.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/tokenizer_config.json"
-      },
-      {
-        "filename": "model.cache",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/LlamaCorn-1.1B-Chat-fp16/model.cache"
-      }
-    ],
-    "id": "llamacorn-1.1b-chat-fp16",
-    "object": "model",
-    "name": "LlamaCorn 1.1B Chat FP16",
-    "version": "1.0",
-    "description": "LlamaCorn is a refined version of TinyLlama-1.1B, optimized for conversational quality, running on consumer devices through TensorRT-LLM",
-    "format": "TensorRT-LLM",
-    "settings": {
-      "ctx_len": 2048,
-      "text_model": false
-    },
-    "parameters": {
-      "max_tokens": 4096
-    },
-    "metadata": {
-      "author": "LLama",
-      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
-      "size": 2151000000
-    },
-    "engine": "nitro-tensorrt-llm"
-  },
-  {
-    "sources": [
-      {
-        "filename": "config.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json"
-      },
-      {
-        "filename": "mistral_float16_tp1_rank0.engine",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine"
-      },
-      {
-        "filename": "tokenizer.model",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model"
-      },
-      {
-        "filename": "special_tokens_map.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json"
-      },
-      {
-        "filename": "tokenizer.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json"
-      },
-      {
-        "filename": "tokenizer_config.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json"
-      },
-      {
-        "filename": "model.cache",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache"
-      }
-    ],
-    "id": "tinyjensen-1.1b-chat-fp16",
-    "object": "model",
-    "name": "TinyJensen 1.1B Chat FP16",
-    "version": "1.0",
-    "description": "Do you want to chat with Jensen Huan? Here you are",
-    "format": "TensorRT-LLM",
-    "settings": {
-      "ctx_len": 2048,
-      "text_model": false
-    },
-    "parameters": {
-      "max_tokens": 4096
-    },
-    "metadata": {
-      "author": "LLama",
-      "tags": ["TensorRT-LLM", "1B", "Finetuned"],
-      "size": 2151000000
-    },
-    "engine": "nitro-tensorrt-llm"
-  },
-  {
-    "sources": [
-      {
-        "filename": "config.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/config.json"
-      },
-      {
-        "filename": "mistral_float16_tp1_rank0.engine",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/mistral_float16_tp1_rank0.engine"
-      },
-      {
-        "filename": "tokenizer.model",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.model"
-      },
-      {
-        "filename": "special_tokens_map.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/special_tokens_map.json"
-      },
-      {
-        "filename": "tokenizer.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer.json"
-      },
-      {
-        "filename": "tokenizer_config.json",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/tokenizer_config.json"
-      },
-      {
-        "filename": "model.cache",
-        "url": "https://catalog.jan.ai/dist/models/<gpuarch>/<os>/tensorrt-llm-v0.7.1/Mistral-7B-Instruct-v0.1-int4/model.cache"
-      }
-    ],
-    "id": "mistral-7b-instruct-int4",
-    "object": "model",
-    "name": "Mistral 7B Instruct v0.1 INT4",
-    "version": "1.0",
-    "description": "Mistral 7B Instruct v0.1 INT4",
-    "format": "TensorRT-LLM",
-    "settings": {
-      "ctx_len": 2048,
-      "text_model": false,
-      "prompt_template": "[INST] {prompt} [/INST]"
-    },
-    "parameters": {
-      "max_tokens": 4096
-    },
-    "metadata": {
-      "author": "MistralAI",
-      "tags": ["TensorRT-LLM", "7B", "Finetuned"],
-      "size": 3840000000
-    },
-    "engine": "nitro-tensorrt-llm"
-  }
-]
diff --git a/extensions/tensorrt-llm-extension/rolldown.config.mjs b/extensions/tensorrt-llm-extension/rolldown.config.mjs
deleted file mode 100644
index bb02c785b..000000000
--- a/extensions/tensorrt-llm-extension/rolldown.config.mjs
+++ /dev/null
@@ -1,59 +0,0 @@
-import { defineConfig } from 'rolldown'
-import packageJson from './package.json' with { type: 'json' }
-import modelsJson from './resources/models.json' with { type: 'json' }
-
-export default defineConfig([
-  {
-    input: 'src/index.ts',
-    output: {
-      format: 'esm',
-      file: 'dist/index.js',
-    },
-    platform: 'browser',
-    define: {
-      MODELS: JSON.stringify(modelsJson),
-      TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
-      PROVIDER: JSON.stringify(packageJson.provider),
-      DOWNLOAD_RUNNER_URL:
-        process.platform === 'win32'
-          ? JSON.stringify(
-              'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v<version>-tensorrt-llm-v0.7.1/nitro-windows-v<version>-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz'
-            )
-          : JSON.stringify(
-              'https://github.com/janhq/cortex.tensorrt-llm/releases/download/linux-v<version>/nitro-linux-v<version>-amd64-tensorrt-llm-<gpuarch>.tar.gz'
-            ),
-      NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
-      INFERENCE_URL: JSON.stringify(
-        process.env.INFERENCE_URL ||
-          `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/v1/chat/completions`
-      ),
-      COMPATIBILITY: JSON.stringify(packageJson.compatibility),
-    },
-  },
-  {
-    input: 'src/node/index.ts',
-    external: ['@janhq/core/node'],
-    output: {
-      format: 'cjs',
-      file: 'dist/node/index.cjs.js',
-      sourcemap: false,
-      inlineDynamicImports: true,
-    },
-    replace: {
-      TENSORRT_VERSION: JSON.stringify(packageJson.tensorrtVersion),
-      PROVIDER: JSON.stringify(packageJson.provider),
-      LOAD_MODEL_URL: JSON.stringify(
-        `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/inferences/tensorrtllm/loadmodel`
-      ),
-      TERMINATE_ENGINE_URL: JSON.stringify(
-        `${packageJson.config?.protocol ?? 'http'}://${packageJson.config?.host}:${packageJson.config?.port}/processmanager/destroy`
-      ),
-      ENGINE_HOST: JSON.stringify(packageJson.config?.host ?? '127.0.0.1'),
-      ENGINE_PORT: JSON.stringify(packageJson.config?.port ?? '3928'),
-    },
-    resolve: {
-      extensions: ['.js', '.ts', '.json'],
-    },
-    platform: 'node',
-  },
-])
diff --git a/extensions/tensorrt-llm-extension/src/@types/global.d.ts b/extensions/tensorrt-llm-extension/src/@types/global.d.ts
deleted file mode 100644
index b550080f7..000000000
--- a/extensions/tensorrt-llm-extension/src/@types/global.d.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-declare const NODE: string
-declare const INFERENCE_URL: string
-declare const LOAD_MODEL_URL: string
-declare const TERMINATE_ENGINE_URL: string
-declare const ENGINE_HOST: string
-declare const ENGINE_PORT: string
-declare const DOWNLOAD_RUNNER_URL: string
-declare const TENSORRT_VERSION: string
-declare const COMPATIBILITY: object
-declare const PROVIDER: string
-declare const MODELS: Array<any>
diff --git a/extensions/tensorrt-llm-extension/src/index.test.ts b/extensions/tensorrt-llm-extension/src/index.test.ts
deleted file mode 100644
index 48d6e71d7..000000000
--- a/extensions/tensorrt-llm-extension/src/index.test.ts
+++ /dev/null
@@ -1,186 +0,0 @@
-import TensorRTLLMExtension from '../src/index'
-import {
-  executeOnMain,
-  systemInformation,
-  fs,
-  baseName,
-  joinPath,
-  downloadFile,
-} from '@janhq/core'
-
-jest.mock('@janhq/core', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  LocalOAIEngine: jest.fn().mockImplementation(function () {
-    // @ts-ignore
-    this.registerModels = () => {
-      return Promise.resolve()
-    }
-    // @ts-ignore
-    return this
-  }),
-  systemInformation: jest.fn(),
-  fs: {
-    existsSync: jest.fn(),
-    mkdir: jest.fn(),
-  },
-  joinPath: jest.fn(),
-  baseName: jest.fn(),
-  downloadFile: jest.fn(),
-  executeOnMain: jest.fn(),
-  showToast: jest.fn(),
-  events: {
-    emit: jest.fn(),
-    // @ts-ignore
-    on: (event, func) => {
-      func({ fileName: './' })
-    },
-    off: jest.fn(),
-  },
-}))
-
-// @ts-ignore
-global.COMPATIBILITY = {
-  platform: ['win32'],
-}
-// @ts-ignore
-global.PROVIDER = 'tensorrt-llm'
-// @ts-ignore
-global.INFERENCE_URL = 'http://localhost:5000'
-// @ts-ignore
-global.NODE = 'node'
-// @ts-ignore
-global.MODELS = []
-// @ts-ignore
-global.TENSORRT_VERSION = ''
-// @ts-ignore
-global.DOWNLOAD_RUNNER_URL = ''
-
-describe('TensorRTLLMExtension', () => {
-  let extension: TensorRTLLMExtension
-
-  beforeEach(() => {
-    // @ts-ignore
-    extension = new TensorRTLLMExtension()
-    jest.clearAllMocks()
-  })
-
-  describe('compatibility', () => {
-    it('should return the correct compatibility', () => {
-      const result = extension.compatibility()
-      expect(result).toEqual({
-        platform: ['win32'],
-      })
-    })
-  })
-
-  describe('install', () => {
-    it('should install if compatible', async () => {
-      const mockSystemInfo: any = {
-        osInfo: { platform: 'win32' },
-        gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
-      }
-      ;(executeOnMain as jest.Mock).mockResolvedValue({})
-      ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
-      ;(fs.existsSync as jest.Mock).mockResolvedValue(false)
-      ;(fs.mkdir as jest.Mock).mockResolvedValue(undefined)
-      ;(baseName as jest.Mock).mockResolvedValue('./')
-      ;(joinPath as jest.Mock).mockResolvedValue('./')
-      ;(downloadFile as jest.Mock).mockResolvedValue({})
-
-      await extension.install()
-
-      expect(executeOnMain).toHaveBeenCalled()
-    })
-
-    it('should not install if not compatible', async () => {
-      const mockSystemInfo: any = {
-        osInfo: { platform: 'linux' },
-        gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] },
-      }
-      ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
-
-      jest.spyOn(extension, 'registerModels').mockReturnValue(Promise.resolve())
-      await extension.install()
-
-      expect(executeOnMain).not.toHaveBeenCalled()
-    })
-  })
-
-  describe('installationState', () => {
-    it('should return NotCompatible if not compatible', async () => {
-      const mockSystemInfo: any = {
-        osInfo: { platform: 'linux' },
-        gpuSetting: { gpus: [{ arch: 'pascal', name: 'NVIDIA GPU' }] },
-      }
-      ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
-
-      const result = await extension.installationState()
-
-      expect(result).toBe('NotCompatible')
-    })
-
-    it('should return Installed if executable exists', async () => {
-      const mockSystemInfo: any = {
-        osInfo: { platform: 'win32' },
-        gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
-      }
-      ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
-      ;(fs.existsSync as jest.Mock).mockResolvedValue(true)
-
-      const result = await extension.installationState()
-
-      expect(result).toBe('Installed')
-    })
-
-    it('should return NotInstalled if executable does not exist', async () => {
-      const mockSystemInfo: any = {
-        osInfo: { platform: 'win32' },
-        gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
-      }
-      ;(systemInformation as jest.Mock).mockResolvedValue(mockSystemInfo)
-      ;(fs.existsSync as jest.Mock).mockResolvedValue(false)
-
-      const result = await extension.installationState()
-
-      expect(result).toBe('NotInstalled')
-    })
-  })
-
-  describe('isCompatible', () => {
-    it('should return true for compatible system', () => {
-      const mockInfo: any = {
-        osInfo: { platform: 'win32' },
-        gpuSetting: { gpus: [{ arch: 'ampere', name: 'NVIDIA GPU' }] },
-      }
-
-      const result = extension.isCompatible(mockInfo)
-
-      expect(result).toBe(true)
-    })
-
-    it('should return false for incompatible system', () => {
-      const mockInfo: any = {
-        osInfo: { platform: 'linux' },
-        gpuSetting: { gpus: [{ arch: 'pascal', name: 'AMD GPU' }] },
-      }
-
-      const result = extension.isCompatible(mockInfo)
-
-      expect(result).toBe(false)
-    })
-  })
-})
-
-describe('GitHub Release File URL Test', () => {
-  const url = 'https://github.com/janhq/cortex.tensorrt-llm/releases/download/windows-v0.1.8-tensorrt-llm-v0.7.1/nitro-windows-v0.1.8-tensorrt-llm-v0.7.1-amd64-all-arch.tar.gz';
-
-  it('should return a status code 200 for the release file URL', async () => {
-    const response = await fetch(url, { method: 'HEAD' });
-    expect(response.status).toBe(200);
-  });
-
-  it('should not return a 404 status', async () => {
-    const response = await fetch(url, { method: 'HEAD' });
-    expect(response.status).not.toBe(404);
-  });
-});
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
deleted file mode 100644
index d9c89242f..000000000
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ /dev/null
@@ -1,197 +0,0 @@
-/**
- * @module tensorrt-llm-extension/src/index
- */
-
-import {
-  Compatibility,
-  DownloadEvent,
-  DownloadRequest,
-  DownloadState,
-  InstallationState,
-  baseName,
-  downloadFile,
-  events,
-  executeOnMain,
-  joinPath,
-  showToast,
-  systemInformation,
-  LocalOAIEngine,
-  fs,
-  MessageRequest,
-  ModelEvent,
-  getJanDataFolderPath,
-  SystemInformation,
-  Model,
-} from '@janhq/core'
-
-/**
- * TensorRTLLMExtension - Implementation of LocalOAIEngine
- * @extends BaseOAILocalInferenceProvider
- * Provide pre-populated models for TensorRTLLM
- */
-export default class TensorRTLLMExtension extends LocalOAIEngine {
-  /**
-   * Override custom function name for loading and unloading model
-   * Which are implemented from node module
-   */
-  override provider = PROVIDER
-  override inferenceUrl = INFERENCE_URL
-  override nodeModule = NODE
-
-  private supportedGpuArch = ['ampere', 'ada']
-
-  override compatibility() {
-    return COMPATIBILITY as unknown as Compatibility
-  }
-
-  override async onLoad(): Promise<void> {
-    super.onLoad()
-
-    if ((await this.installationState()) === 'Installed') {
-      const models = MODELS as unknown as Model[]
-      this.registerModels(models)
-    }
-  }
-
-  override async install(): Promise<void> {
-    await this.removePopulatedModels()
-
-    const info = await systemInformation()
-
-    if (!this.isCompatible(info)) return
-
-    const janDataFolderPath = await getJanDataFolderPath()
-    const engineVersion = TENSORRT_VERSION
-
-    const executableFolderPath = await joinPath([
-      janDataFolderPath,
-      'engines',
-      this.provider,
-      engineVersion,
-      info.gpuSetting?.gpus[0].arch,
-    ])
-
-    if (!(await fs.existsSync(executableFolderPath))) {
-      await fs.mkdir(executableFolderPath)
-    }
-
-    const placeholderUrl = DOWNLOAD_RUNNER_URL
-    const tensorrtVersion = TENSORRT_VERSION
-
-    const url = placeholderUrl
-      .replace(/<version>/g, tensorrtVersion)
-      .replace(/<gpuarch>/g, info.gpuSetting!.gpus[0]!.arch!)
-
-    const tarball = await baseName(url)
-
-    const tarballFullPath = await joinPath([executableFolderPath, tarball])
-    const downloadRequest: DownloadRequest = {
-      url,
-      localPath: tarballFullPath,
-      extensionId: this.name,
-      downloadType: 'extension',
-    }
-    downloadFile(downloadRequest)
-
-    const onFileDownloadSuccess = async (state: DownloadState) => {
-      // if other download, ignore
-      if (state.fileName !== tarball) return
-      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      await executeOnMain(
-        this.nodeModule,
-        'decompressRunner',
-        tarballFullPath,
-        executableFolderPath
-      )
-      events.emit(DownloadEvent.onFileUnzipSuccess, state)
-
-      // Prepopulate models as soon as it's ready
-      const models = MODELS as unknown as Model[]
-      this.registerModels(models).then(() => {
-        showToast(
-          'Extension installed successfully.',
-          'New models are added to Model Hub.'
-        )
-      })
-    }
-    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-  }
-
-  private async removePopulatedModels(): Promise<void> {
-    const models = MODELS as unknown as Model[]
-    console.debug(`removePopulatedModels`, JSON.stringify(models))
-    const janDataFolderPath = await getJanDataFolderPath()
-    const modelFolderPath = await joinPath([janDataFolderPath, 'models'])
-
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-
-      try {
-        await fs.rm(modelPath)
-      } catch (err) {
-        console.error(`Error removing model ${modelPath}`, err)
-      }
-    }
-    events.emit(ModelEvent.OnModelsUpdate, {})
-  }
-
-  override async loadModel(model: Model): Promise<void> {
-    if ((await this.installationState()) === 'Installed')
-      return super.loadModel(model)
-
-    throw new Error('EXTENSION_IS_NOT_INSTALLED::TensorRT-LLM extension')
-  }
-
-  override async installationState(): Promise<InstallationState> {
-    const info = await systemInformation()
-
-    if (!this.isCompatible(info)) return 'NotCompatible'
-    const firstGpu = info.gpuSetting?.gpus[0]
-    const janDataFolderPath = await getJanDataFolderPath()
-    const engineVersion = TENSORRT_VERSION
-
-    const enginePath = await joinPath([
-      janDataFolderPath,
-      'engines',
-      this.provider,
-      engineVersion,
-      firstGpu.arch,
-      info.osInfo.platform === 'win32' ? 'nitro.exe' : 'nitro',
-    ])
-
-    // For now, we just check the executable of nitro x tensor rt
-    return (await fs.existsSync(enginePath)) ? 'Installed' : 'NotInstalled'
-  }
-
-  override stopInference() {
-    if (!this.loadedModel) return
-    showToast(
-      'Unable to Stop Inference',
-      'The model does not support stopping inference.'
-    )
-    return Promise.resolve()
-  }
-
-  override async inference(data: MessageRequest) {
-    if (!this.loadedModel) return
-    // TensorRT LLM Extension supports streaming only
-    if (data.model && data.model.parameters) data.model.parameters.stream = true
-    super.inference(data)
-  }
-
-  isCompatible(info: SystemInformation): info is Required<SystemInformation> & {
-    gpuSetting: { gpus: { arch: string }[] }
-  } {
-    const firstGpu = info.gpuSetting?.gpus[0]
-    return (
-      !!info.osInfo &&
-      !!info.gpuSetting &&
-      !!firstGpu &&
-      info.gpuSetting.gpus.length > 0 &&
-      this.compatibility().platform.includes(info.osInfo.platform) &&
-      !!firstGpu.arch &&
-      firstGpu.name.toLowerCase().includes('nvidia') &&
-      this.supportedGpuArch.includes(firstGpu.arch)
-    )
-  }
-}
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
deleted file mode 100644
index d02427170..000000000
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ /dev/null
@@ -1,325 +0,0 @@
-import path from 'path'
-import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
-import tcpPortUsed from 'tcp-port-used'
-import fetchRT from 'fetch-retry'
-import {
-  log,
-  getJanDataFolderPath,
-  SystemInformation,
-  PromptTemplate,
-} from '@janhq/core/node'
-import decompress from 'decompress'
-import terminate from 'terminate'
-
-// Polyfill fetch with retry
-const fetchRetry = fetchRT(fetch)
-
-const supportedPlatform = (): string[] => ['win32', 'linux']
-const supportedGpuArch = (): string[] => ['ampere', 'ada']
-const PORT_CHECK_INTERVAL = 100
-
-/**
- * The response object for model init operation.
- */
-interface ModelLoadParams {
-  engine_path: string
-  ctx_len: number
-}
-
-// The subprocess instance for Engine
-let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
-
-/**
- * Initializes a engine subprocess to load a machine learning model.
- * @param params - The model load settings.
- */
-async function loadModel(
-  params: any,
-  systemInfo?: SystemInformation
-): Promise<{ error: Error | undefined }> {
-  // modelFolder is the absolute path to the running model folder
-  // e.g. ~/jan/models/llama-2
-  let modelFolder = params.modelFolder
-
-  if (params.model.settings?.prompt_template) {
-    const promptTemplate = params.model.settings.prompt_template
-    const prompt = promptTemplateConverter(promptTemplate)
-    if (prompt?.error) {
-      return Promise.reject(prompt.error)
-    }
-    params.model.settings.system_prompt = prompt.system_prompt
-    params.model.settings.user_prompt = prompt.user_prompt
-    params.model.settings.ai_prompt = prompt.ai_prompt
-  }
-
-  const settings: ModelLoadParams = {
-    engine_path: modelFolder,
-    ctx_len: params.model.settings.ctx_len ?? 2048,
-    ...params.model.settings,
-  }
-  if (!systemInfo) {
-    throw new Error('Cannot get system info. Unable to start nitro x tensorrt.')
-  }
-  return runEngineAndLoadModel(settings, systemInfo)
-}
-
-/**
- * Stops a Engine subprocess.
- */
-function unloadModel(): Promise<void> {
-  const controller = new AbortController()
-  setTimeout(() => controller.abort(), 5000)
-  debugLog(`Request to kill engine`)
-
-  const killRequest = () => {
-    return fetch(TERMINATE_ENGINE_URL, {
-      method: 'DELETE',
-      signal: controller.signal,
-    })
-      .then(() => {
-        subprocess = undefined
-      })
-      .catch(() => {}) // Do nothing with this attempt
-      .then(() =>
-        tcpPortUsed.waitUntilFree(
-          parseInt(ENGINE_PORT),
-          PORT_CHECK_INTERVAL,
-          5000
-        )
-      ) // Wait for port available
-      .then(() => debugLog(`Engine process is terminated`))
-      .catch((err) => {
-        debugLog(
-          `Could not kill running process on port ${ENGINE_PORT}. Might be another process running on the same port? ${err}`
-        )
-        throw 'PORT_NOT_AVAILABLE'
-      })
-  }
-
-  if (subprocess?.pid) {
-    log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
-    const pid = subprocess.pid
-    return new Promise((resolve, reject) => {
-      terminate(pid, function (err) {
-        if (err) {
-          return killRequest()
-        } else {
-          return tcpPortUsed
-            .waitUntilFree(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 5000)
-            .then(() => resolve())
-            .then(() => log(`[CORTEX]:: cortex process is terminated`))
-            .catch(() => {
-              killRequest()
-            })
-        }
-      })
-    })
-  } else {
-    return killRequest()
-  }
-}
-/**
- * 1. Spawn engine process
- * 2. Load model into engine subprocess
- * @returns
- */
-async function runEngineAndLoadModel(
-  settings: ModelLoadParams,
-  systemInfo: SystemInformation
-) {
-  return unloadModel()
-    .then(() => runEngine(systemInfo))
-    .then(() => loadModelRequest(settings))
-    .catch((err) => {
-      // TODO: Broadcast error so app could display proper error message
-      debugLog(`${err}`, 'Error')
-      return { error: err }
-    })
-}
-
-/**
- * Loads a LLM model into the Engine subprocess by sending a HTTP POST request.
- */
-async function loadModelRequest(
-  settings: ModelLoadParams
-): Promise<{ error: Error | undefined }> {
-  debugLog(`Loading model with params ${JSON.stringify(settings)}`)
-  return fetchRetry(LOAD_MODEL_URL, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    body: JSON.stringify(settings),
-    retries: 3,
-    retryDelay: 500,
-  })
-    .then((res) => {
-      debugLog(`Load model success with response ${JSON.stringify(res)}`)
-      return Promise.resolve({ error: undefined })
-    })
-    .catch((err) => {
-      debugLog(`Load model failed with error ${err}`, 'Error')
-      return Promise.resolve({ error: err })
-    })
-}
-
-/**
- * Spawns engine subprocess.
- */
-async function runEngine(systemInfo: SystemInformation): Promise<void> {
-  debugLog(`Spawning engine subprocess...`)
-  if (systemInfo.gpuSetting == null) {
-    return Promise.reject(
-      'No GPU information found. Please check your GPU setting.'
-    )
-  }
-
-  if (systemInfo.gpuSetting?.gpus.length === 0) {
-    return Promise.reject('No GPU found. Please check your GPU setting.')
-  }
-
-  if (systemInfo.osInfo == null) {
-    return Promise.reject(
-      'No OS information found. Please check your OS setting.'
-    )
-  }
-  const platform = systemInfo.osInfo.platform
-  if (platform == null || supportedPlatform().includes(platform) === false) {
-    return Promise.reject(
-      'No OS architecture found. Please check your OS setting.'
-    )
-  }
-
-  const gpu = systemInfo.gpuSetting?.gpus[0]
-  if (gpu.name.toLowerCase().includes('nvidia') === false) {
-    return Promise.reject('No Nvidia GPU found. Please check your GPU setting.')
-  }
-  const gpuArch = gpu.arch
-  if (gpuArch == null || supportedGpuArch().includes(gpuArch) === false) {
-    return Promise.reject(
-      `Your GPU: ${gpu.name} is not supported. Only ${supportedGpuArch().join(
-        ', '
-      )} series are supported.`
-    )
-  }
-  const janDataFolderPath = await getJanDataFolderPath()
-  const tensorRtVersion = TENSORRT_VERSION
-  const provider = PROVIDER
-
-  return new Promise<void>((resolve, reject) => {
-    // Current directory by default
-
-    const executableFolderPath = path.join(
-      janDataFolderPath,
-      'engines',
-      provider,
-      tensorRtVersion,
-      gpuArch
-    )
-    const nitroExecutablePath = path.join(
-      executableFolderPath,
-      platform === 'win32' ? 'nitro.exe' : 'nitro'
-    )
-
-    const args: string[] = ['1', ENGINE_HOST, ENGINE_PORT]
-    // Execute the binary
-    debugLog(`Spawn nitro at path: ${nitroExecutablePath}, and args: ${args}`)
-    subprocess = spawn(nitroExecutablePath, args, {
-      cwd: executableFolderPath,
-      env: {
-        ...process.env,
-      },
-    })
-
-    // Handle subprocess output
-    subprocess.stdout.on('data', (data: any) => {
-      debugLog(`${data}`)
-    })
-
-    subprocess.stderr.on('data', (data: any) => {
-      debugLog(`${data}`)
-    })
-
-    subprocess.on('close', (code: any) => {
-      debugLog(`Engine exited with code: ${code}`)
-      subprocess = undefined
-      reject(`child process exited with code ${code}`)
-    })
-
-    tcpPortUsed
-      .waitUntilUsed(parseInt(ENGINE_PORT), PORT_CHECK_INTERVAL, 30000)
-      .then(() => {
-        debugLog(`Engine is ready`)
-        resolve()
-      })
-  })
-}
-
-function debugLog(message: string, level: string = 'Debug') {
-  log(`[TENSORRT_LLM_NITRO]::${level}:${message}`)
-}
-
-const decompressRunner = async (zipPath: string, output: string) => {
-  console.debug(`Decompressing ${zipPath} to ${output}...`)
-  try {
-    const files = await decompress(zipPath, output)
-    console.debug('Decompress finished!', files)
-  } catch (err) {
-    console.error(`Decompress ${zipPath} failed: ${err}`)
-  }
-}
-
-/**
- * Parse prompt template into agrs settings
- * @param promptTemplate Template as string
- * @returns
- */
-function promptTemplateConverter(promptTemplate: string): PromptTemplate {
-  // Split the string using the markers
-  const systemMarker = '{system_message}'
-  const promptMarker = '{prompt}'
-
-  if (
-    promptTemplate.includes(systemMarker) &&
-    promptTemplate.includes(promptMarker)
-  ) {
-    // Find the indices of the markers
-    const systemIndex = promptTemplate.indexOf(systemMarker)
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-
-    // Extract the parts of the string
-    const system_prompt = promptTemplate.substring(0, systemIndex)
-    const user_prompt = promptTemplate.substring(
-      systemIndex + systemMarker.length,
-      promptIndex
-    )
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { system_prompt, user_prompt, ai_prompt }
-  } else if (promptTemplate.includes(promptMarker)) {
-    // Extract the parts of the string for the case where only promptMarker is present
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-    const user_prompt = promptTemplate.substring(0, promptIndex)
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { user_prompt, ai_prompt }
-  }
-
-  // Return an error if none of the conditions are met
-  return { error: 'Cannot split prompt template' }
-}
-
-export default {
-  supportedPlatform,
-  supportedGpuArch,
-  decompressRunner,
-  loadModel,
-  unloadModel,
-  dispose: unloadModel,
-}
diff --git a/extensions/tensorrt-llm-extension/tsconfig.json b/extensions/tensorrt-llm-extension/tsconfig.json
deleted file mode 100644
index 94465ebb6..000000000
--- a/extensions/tensorrt-llm-extension/tsconfig.json
+++ /dev/null
@@ -1,21 +0,0 @@
-{
-  "compilerOptions": {
-    "moduleResolution": "node",
-    "target": "ES2015",
-    "module": "ES2020",
-    "lib": ["es2015", "es2016", "es2017", "dom"],
-    "strict": true,
-    "sourceMap": true,
-    "declaration": true,
-    "allowSyntheticDefaultImports": true,
-    "experimentalDecorators": true,
-    "emitDecoratorMetadata": true,
-    "declarationDir": "dist/types",
-    "outDir": "dist",
-    "importHelpers": true,
-    "resolveJsonModule": true,
-    "typeRoots": ["node_modules/@types"]
-  },
-  "include": ["src"],
-  "exclude": ["**/*.test.ts"]
-}