From a221fdf09efefdfdb049710e5c904cf0d93ddcf0 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 4 Nov 2024 12:46:42 +0700
Subject: [PATCH 01/71] fix: slate react version (#3940)

* fix: slate react version

* fix: add slate-dom package
---
 web/package.json | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/web/package.json b/web/package.json
index bed94aed4..af3bce5d1 100644
--- a/web/package.json
+++ b/web/package.json
@@ -52,8 +52,9 @@
     "uuid": "^9.0.1",
     "zod": "^3.22.4",
     "slate": "latest",
-    "slate-react": "latest",
-    "slate-history": "latest"
+    "slate-dom": "0.111.0",
+    "slate-react": "0.110.3",
+    "slate-history": "0.110.3"
   },
   "devDependencies": {
     "@next/eslint-plugin-next": "^14.0.1",

From a4ebf69bc99eab130880d197194a88ee444a0549 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 4 Nov 2024 13:27:03 +0700
Subject: [PATCH 02/71] fix: inconsistent left panel background when responsive
 (#3936)

---
 themes/joi-dark/theme.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/themes/joi-dark/theme.json b/themes/joi-dark/theme.json
index d389d853c..22ed778ba 100644
--- a/themes/joi-dark/theme.json
+++ b/themes/joi-dark/theme.json
@@ -52,7 +52,7 @@
     },
 
     "left-panel": {
-      "bg": "0, 0%, 13%, 0",
+      "bg": "0, 0%, 13%, 1",
       "menu": "0, 0%, 95%, 1",
       "menu-hover": "0, 0%, 28%, 0.2",
       "menu-active": "0, 0%, 100%, 1",

From 3c721cb97d0b1ec048a7fcd7847b1fbb5ccc99ba Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 4 Nov 2024 14:07:20 +0700
Subject: [PATCH 03/71] ui: reduced aggresive blur effect on the modal
 component (#3937)

---
 joi/src/core/Modal/styles.scss | 1 -
 themes/dark-dimmed/theme.json  | 2 +-
 themes/joi-light/theme.json    | 2 +-
 themes/night-blue/theme.json   | 2 +-
 4 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/joi/src/core/Modal/styles.scss b/joi/src/core/Modal/styles.scss
index 11af9418a..717ce2ac7 100644
--- a/joi/src/core/Modal/styles.scss
+++ b/joi/src/core/Modal/styles.scss
@@ -3,7 +3,6 @@ button,
 fieldset,
 .modal {
   &__overlay {
-    @apply backdrop-blur-lg;
     background-color: hsla(var(--modal-overlay));
     z-index: 200;
     position: fixed;
diff --git a/themes/dark-dimmed/theme.json b/themes/dark-dimmed/theme.json
index c96085092..062469072 100644
--- a/themes/dark-dimmed/theme.json
+++ b/themes/dark-dimmed/theme.json
@@ -114,7 +114,7 @@
     },
 
     "modal": {
-      "overlay": "0, 0%, 0%, 0.5",
+      "overlay": "0, 0%, 0%, 0.7",
       "bg": "215, 25%, 9%, 1",
       "fg": "0, 0%, 100%, 11"
     },
diff --git a/themes/joi-light/theme.json b/themes/joi-light/theme.json
index 36b7a0a50..7468ff976 100644
--- a/themes/joi-light/theme.json
+++ b/themes/joi-light/theme.json
@@ -114,7 +114,7 @@
     },
 
     "modal": {
-      "overlay": "0, 0%, 0%, 0.5",
+      "overlay": "0, 0%, 0%, 0.7",
       "bg": "0, 0%, 100%, 1",
       "fg": "0, 0%, 0%, 1"
     },
diff --git a/themes/night-blue/theme.json b/themes/night-blue/theme.json
index c09442b43..96d2e242c 100644
--- a/themes/night-blue/theme.json
+++ b/themes/night-blue/theme.json
@@ -114,7 +114,7 @@
     },
 
     "modal": {
-      "overlay": "0, 0%, 0%, 0.5",
+      "overlay": "0, 0%, 0%, 0.7",
       "bg": "222, 96%, 16%, 1",
       "fg": "0, 0%, 100%, 11"
     },

From 755b2f234b744c1d88b93dea5f62b65cf47d7c53 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 4 Nov 2024 14:57:09 +0700
Subject: [PATCH 04/71] ux: auto-focus the chat input box  (#3941)

* ux: auto-focus the chat input box and search box when switching between pages and thread lists

* chore: remove autofocus hub
---
 .../Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx    | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
index 88ddad485..87ea4e08f 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@@ -269,10 +269,13 @@ const RichTextEditor = ({
   )
 
   useEffect(() => {
+    if (!ReactEditor.isFocused(editor)) {
+      ReactEditor.focus(editor)
+    }
     if (textareaRef.current) {
       textareaRef.current.focus()
     }
-  }, [activeThreadId])
+  }, [activeThreadId, editor])
 
   useEffect(() => {
     if (textareaRef.current?.clientHeight) {

From c6481d4668cb751501d1b7fc1892565bb7889017 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 4 Nov 2024 14:57:22 +0700
Subject: [PATCH 05/71] ui: remove redundant animation (#3938)

---
 web/containers/Layout/RibbonPanel/index.tsx        | 11 ++---------
 .../SettingLeftPanel/SettingItem/index.tsx         | 14 +++++---------
 web/screens/Thread/ThreadLeftPanel/index.tsx       | 11 +++--------
 3 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/web/containers/Layout/RibbonPanel/index.tsx b/web/containers/Layout/RibbonPanel/index.tsx
index ee00b2237..13116dc16 100644
--- a/web/containers/Layout/RibbonPanel/index.tsx
+++ b/web/containers/Layout/RibbonPanel/index.tsx
@@ -1,5 +1,4 @@
 import { Tooltip, useMediaQuery } from '@janhq/joi'
-import { motion as m } from 'framer-motion'
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 import {
   MessageCircleIcon,
@@ -109,19 +108,13 @@ export default function RibbonPanel() {
                   <div
                     data-testid={menu.name}
                     className={twMerge(
-                      'relative flex w-full flex-shrink-0 cursor-pointer items-center justify-center text-[hsla(var(--ribbon-panel-icon))] ',
+                      'relative flex w-full flex-shrink-0 cursor-pointer items-center justify-center rounded-md p-1.5 text-[hsla(var(--ribbon-panel-icon))]',
                       isActive &&
-                        'z-10 text-[hsla(var(--ribbon-panel-icon-active))]'
+                        'z-10 bg-[hsla(var(--ribbon-panel-icon-active-bg))] text-[hsla(var(--ribbon-panel-icon-active))]'
                     )}
                   >
                     {menu.icon}
                   </div>
-                  {isActive && (
-                    <m.div
-                      className="absolute inset-0 left-0 h-full w-full rounded-md bg-[hsla(var(--ribbon-panel-icon-active-bg))]"
-                      layoutId="active-state-menu"
-                    />
-                  )}
                 </div>
               }
               content={
diff --git a/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx b/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx
index 505a2cf40..b8c1994c5 100644
--- a/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx
+++ b/web/screens/Settings/SettingLeftPanel/SettingItem/index.tsx
@@ -1,6 +1,5 @@
 import { useCallback } from 'react'
 
-import { motion as m } from 'framer-motion'
 import { useAtom } from 'jotai'
 
 import { twMerge } from 'tailwind-merge'
@@ -22,23 +21,20 @@ const SettingItem = ({ name, setting }: Props) => {
 
   return (
     <div
-      className="relative my-0.5 block cursor-pointer rounded-lg px-2 py-1.5 hover:bg-[hsla(var(--left-panel-menu-hover))]"
+      className={twMerge(
+        'relative my-0.5 block cursor-pointer rounded-lg px-2 py-1.5 hover:bg-[hsla(var(--left-panel-menu-hover))]',
+        isActive && 'rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]'
+      )}
       onClick={onSettingItemClick}
     >
       <span
         className={twMerge(
-          'font-medium capitalize text-[hsla(var(--left-panel-menu))]',
+          'p-1.5 font-medium capitalize text-[hsla(var(--left-panel-menu))]',
           isActive && 'relative z-10 text-[hsla(var(--left-panel-menu-active))]'
         )}
       >
         {name}
       </span>
-      {isActive && (
-        <m.div
-          className="absolute inset-0 -left-0.5 h-full w-[calc(100%+4px)] rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]"
-          layoutId="active-static-menu"
-        />
-      )}
     </div>
   )
 }
diff --git a/web/screens/Thread/ThreadLeftPanel/index.tsx b/web/screens/Thread/ThreadLeftPanel/index.tsx
index 67bc5db0c..aca21c8a2 100644
--- a/web/screens/Thread/ThreadLeftPanel/index.tsx
+++ b/web/screens/Thread/ThreadLeftPanel/index.tsx
@@ -3,7 +3,6 @@ import { useCallback, useEffect, useState } from 'react'
 import { Thread } from '@janhq/core'
 
 import { Button } from '@janhq/joi'
-import { motion as m } from 'framer-motion'
 import { useAtomValue, useSetAtom } from 'jotai'
 import {
   GalleryHorizontalEndIcon,
@@ -118,7 +117,9 @@ const ThreadLeftPanel = () => {
             <div
               key={thread.id}
               className={twMerge(
-                `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-[hsla(var(--left-panel-menu-hover))]`
+                `group/message relative mb-1 flex cursor-pointer flex-col transition-all hover:rounded-lg hover:bg-[hsla(var(--left-panel-menu-hover))]`,
+                activeThreadId === thread.id &&
+                  'rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]'
               )}
               onClick={() => {
                 onThreadClick(thread)
@@ -208,12 +209,6 @@ const ThreadLeftPanel = () => {
                   </div>
                 </div>
               </div>
-              {activeThreadId === thread.id && (
-                <m.div
-                  className="absolute inset-0 left-0 h-full w-full rounded-lg bg-[hsla(var(--left-panel-icon-active-bg))]"
-                  layoutId="active-thread"
-                />
-              )}
             </div>
           ))}
         </div>

From 4080dc4b6589cb5b98d5e1b6436b64b008a3f58f Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 15 Oct 2024 13:06:33 +0700
Subject: [PATCH 06/71] feat: model and cortex extensions update

---
 core/src/browser/extension.ts                 |   35 +-
 .../extensions/engines/AIEngine.test.ts       |    8 +-
 .../browser/extensions/engines/AIEngine.ts    |   54 +-
 .../extensions/engines/EngineManager.ts       |   19 +-
 .../extensions/engines/LocalOAIEngine.test.ts |    7 +-
 .../extensions/engines/LocalOAIEngine.ts      |   45 +-
 .../browser/extensions/engines/OAIEngine.ts   |   17 +-
 .../browser/extensions/engines/helpers/sse.ts |    6 +-
 core/src/browser/extensions/model.ts          |   29 +-
 core/src/browser/index.test.ts                |   55 +-
 core/src/browser/index.ts                     |    6 +
 core/src/browser/models/index.ts              |    5 +
 core/src/browser/models/manager.ts            |   40 +
 core/src/types/api/index.ts                   |    8 +-
 core/src/types/model/modelEntity.ts           |   16 +-
 core/src/types/model/modelInterface.ts        |   33 +-
 docs/src/pages/docs/built-in/llama-cpp.mdx    |    2 +-
 .../assistant-extension/src/node/retrieval.ts |    2 -
 .../.gitignore                                |    0
 .../README.md                                 |    0
 .../download.bat                              |    4 +-
 .../inference-cortex-extension/download.sh    |   36 +
 .../jest.config.js                            |    0
 .../package.json                              |   13 +-
 .../resources/default_settings.json           |    0
 .../resources/models/aya-23-35b/model.json    |    2 +-
 .../resources/models/aya-23-8b/model.json     |    2 +-
 .../resources/models/bakllava-1/model.json    |    2 +-
 .../models/codeninja-1.0-7b/model.json        |    2 +-
 .../resources/models/codestral-22b/model.json |    2 +-
 .../resources/models/command-r-34b/model.json |    2 +-
 .../models/deepseek-coder-1.3b/model.json     |    2 +-
 .../models/deepseek-coder-34b/model.json      |    2 +-
 .../resources/models/gemma-1.1-2b/model.json  |    2 +-
 .../resources/models/gemma-1.1-7b/model.json  |    2 +-
 .../resources/models/gemma-2-27b/model.json   |    2 +-
 .../resources/models/gemma-2-2b/model.json    |    2 +-
 .../resources/models/gemma-2-9b/model.json    |    2 +-
 .../models/llama2-chat-70b/model.json         |    2 +-
 .../models/llama2-chat-7b/model.json          |    2 +-
 .../models/llama3-8b-instruct/model.json      |    2 +-
 .../models/llama3-hermes-8b/model.json        |    2 +-
 .../models/llama3.1-70b-instruct/model.json   |    2 +-
 .../models/llama3.1-8b-instruct/model.json    |    2 +-
 .../models/llama3.2-1b-instruct/model.json    |    2 +-
 .../models/llama3.2-3b-instruct/model.json    |    2 +-
 .../models/llamacorn-1.1b/model.json          |    2 +-
 .../resources/models/llava-13b/model.json     |    2 +-
 .../resources/models/llava-7b/model.json      |    2 +-
 .../models/mistral-ins-7b-q4/model.json       |    2 +-
 .../models/mixtral-8x7b-instruct/model.json   |    2 +-
 .../resources/models/noromaid-7b/model.json   |    2 +-
 .../models/openchat-3.5-7b/model.json         |    2 +-
 .../resources/models/phi3-3.8b/model.json     |    2 +-
 .../resources/models/phi3-medium/model.json   |    2 +-
 .../resources/models/phind-34b/model.json     |    2 +-
 .../resources/models/qwen-7b/model.json       |    2 +-
 .../resources/models/qwen2-7b/model.json      |    2 +-
 .../models/qwen2.5-14b-instruct/model.json    |    2 +-
 .../models/qwen2.5-32b-instruct/model.json    |    2 +-
 .../models/qwen2.5-72b-instruct/model.json    |    2 +-
 .../models/qwen2.5-7b-instruct/model.json     |    2 +-
 .../qwen2.5-coder-7b-instruct/model.json      |    2 +-
 .../models/stable-zephyr-3b/model.json        |    2 +-
 .../models/stealth-v1.2-7b/model.json         |    2 +-
 .../models/tinyllama-1.1b/model.json          |    2 +-
 .../models/trinity-v1.2-7b/model.json         |    2 +-
 .../resources/models/vistral-7b/model.json    |    2 +-
 .../models/wizardcoder-13b/model.json         |    2 +-
 .../resources/models/yi-34b/model.json        |    2 +-
 .../rollup.config.ts                          |   14 +-
 .../src/@types/global.d.ts                    |    4 +-
 .../src/babel.config.js                       |    0
 .../inference-cortex-extension/src/index.ts   |  111 ++
 .../src/node/execute.test.ts                  |   69 +-
 .../src/node/execute.ts                       |    8 +-
 .../src/node/index.test.ts                    |   94 ++
 .../src/node/index.ts                         |   83 ++
 .../src/node/watchdog.ts                      |   84 ++
 .../tsconfig.json                             |    0
 .../inference-nitro-extension/bin/version.txt |    1 -
 .../inference-nitro-extension/download.sh     |   41 -
 .../inference-nitro-extension/src/index.ts    |  193 ----
 .../src/node/index.test.ts                    |  465 --------
 .../src/node/index.ts                         |  501 ---------
 extensions/model-extension/package.json       |   13 +-
 .../resources/default-model.json              |   36 -
 extensions/model-extension/rollup.config.ts   |   43 +-
 .../src/@types/InvalidHostError.ts            |    6 -
 .../src/@types/NotSupportModelError.ts        |    6 -
 .../model-extension/src/@types/global.d.ts    |    1 -
 extensions/model-extension/src/cortex.ts      |  166 +++
 .../model-extension/src/helpers/path.test.ts  |   87 --
 .../model-extension/src/helpers/path.ts       |   13 -
 extensions/model-extension/src/index.test.ts  |  890 ++-------------
 extensions/model-extension/src/index.ts       | 1001 ++++-------------
 extensions/model-extension/src/node/index.ts  |   54 -
 .../model-extension/src/node/node.test.ts     |   53 -
 .../tensorrt-llm-extension/src/index.ts       |    8 +-
 .../tensorrt-llm-extension/src/node/index.ts  |    2 +-
 .../BottomPanel/DownloadingState/index.tsx    |    8 +-
 .../SystemMonitor/TableActiveModel/index.tsx  |    4 +-
 web/containers/ModalCancelDownload/index.tsx  |    2 +-
 web/containers/ModelDropdown/index.tsx        |   20 +-
 web/containers/ModelLabel/index.tsx           |    6 +-
 web/containers/Providers/EventListener.tsx    |   17 +
 web/extension/ExtensionManager.ts             |    1 +
 web/helpers/atoms/Model.atom.ts               |   28 +-
 web/hooks/useActiveModel.ts                   |   13 +-
 web/hooks/useCreateNewThread.ts               |    4 +-
 web/hooks/useDeleteModel.test.ts              |    2 +-
 web/hooks/useDeleteModel.ts                   |    8 +-
 web/hooks/useDownloadModel.ts                 |  121 +-
 web/hooks/useDownloadState.ts                 |    2 +-
 web/hooks/useGetHFRepoData.ts                 |   15 +-
 web/hooks/useImportModel.ts                   |   16 +-
 web/hooks/useModels.ts                        |   47 +-
 web/hooks/useRecommendedModel.ts              |   12 +-
 web/hooks/useSendChatMessage.ts               |   69 +-
 .../Hub/ModelList/ModelHeader/index.tsx       |   10 +-
 web/screens/Hub/ModelList/ModelItem/index.tsx |   12 +-
 web/screens/Hub/ModelList/index.tsx           |   20 +-
 web/screens/Hub/index.tsx                     |    2 +-
 .../ModelDownloadRow/index.tsx                |   57 +-
 .../Settings/MyModels/MyModelList/index.tsx   |    6 +-
 .../AssistantSetting/index.tsx                |    8 +-
 .../ChatBody/OnDeviceStarterScreen/index.tsx  |   18 +-
 .../ThreadCenterPanel/ChatInput/index.tsx     |   20 +-
 .../MessageToolbar/index.tsx                  |    2 +-
 .../Thread/ThreadCenterPanel/index.tsx        |   10 +-
 web/screens/Thread/ThreadRightPanel/index.tsx |    4 +-
 web/services/appService.test.ts               |   46 +-
 132 files changed, 1416 insertions(+), 3771 deletions(-)
 create mode 100644 core/src/browser/models/index.ts
 create mode 100644 core/src/browser/models/manager.ts
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/.gitignore (100%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/README.md (100%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/download.bat (93%)
 create mode 100755 extensions/inference-cortex-extension/download.sh
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/jest.config.js (100%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/package.json (82%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/default_settings.json (100%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/aya-23-35b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/aya-23-8b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/bakllava-1/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/codeninja-1.0-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/codestral-22b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/command-r-34b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/deepseek-coder-1.3b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/deepseek-coder-34b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/gemma-1.1-2b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/gemma-1.1-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/gemma-2-27b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/gemma-2-2b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/gemma-2-9b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama2-chat-70b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama2-chat-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama3-8b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama3-hermes-8b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama3.1-70b-instruct/model.json (98%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama3.1-8b-instruct/model.json (98%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama3.2-1b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llama3.2-3b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llamacorn-1.1b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llava-13b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/llava-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/mistral-ins-7b-q4/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/mixtral-8x7b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/noromaid-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/openchat-3.5-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/phi3-3.8b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/phi3-medium/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/phind-34b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen2-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen2.5-14b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen2.5-32b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen2.5-72b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen2.5-7b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/qwen2.5-coder-7b-instruct/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/stable-zephyr-3b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/stealth-v1.2-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/tinyllama-1.1b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/trinity-v1.2-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/vistral-7b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/wizardcoder-13b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/resources/models/yi-34b/model.json (97%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/rollup.config.ts (93%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/src/@types/global.d.ts (70%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/src/babel.config.js (100%)
 create mode 100644 extensions/inference-cortex-extension/src/index.ts
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/src/node/execute.test.ts (83%)
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/src/node/execute.ts (93%)
 create mode 100644 extensions/inference-cortex-extension/src/node/index.test.ts
 create mode 100644 extensions/inference-cortex-extension/src/node/index.ts
 create mode 100644 extensions/inference-cortex-extension/src/node/watchdog.ts
 rename extensions/{inference-nitro-extension => inference-cortex-extension}/tsconfig.json (100%)
 delete mode 100644 extensions/inference-nitro-extension/bin/version.txt
 delete mode 100755 extensions/inference-nitro-extension/download.sh
 delete mode 100644 extensions/inference-nitro-extension/src/index.ts
 delete mode 100644 extensions/inference-nitro-extension/src/node/index.test.ts
 delete mode 100644 extensions/inference-nitro-extension/src/node/index.ts
 delete mode 100644 extensions/model-extension/resources/default-model.json
 delete mode 100644 extensions/model-extension/src/@types/InvalidHostError.ts
 delete mode 100644 extensions/model-extension/src/@types/NotSupportModelError.ts
 create mode 100644 extensions/model-extension/src/cortex.ts
 delete mode 100644 extensions/model-extension/src/helpers/path.test.ts
 delete mode 100644 extensions/model-extension/src/helpers/path.ts
 delete mode 100644 extensions/model-extension/src/node/index.ts
 delete mode 100644 extensions/model-extension/src/node/node.test.ts

diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts
index 603445745..d934e1c06 100644
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@@ -1,6 +1,8 @@
-import { SettingComponentProps } from '../types'
+import { Model, ModelEvent, SettingComponentProps } from '../types'
 import { getJanDataFolderPath, joinPath } from './core'
+import { events } from './events'
 import { fs } from './fs'
+import { ModelManager } from './models'
 
 export enum ExtensionTypeEnum {
   Assistant = 'assistant',
@@ -103,6 +105,22 @@ export abstract class BaseExtension implements ExtensionType {
     return undefined
   }
 
+  /**
+   * Registers models - it persists in-memory shared ModelManager instance's data map.
+   * @param models
+   */
+  async registerModels(models: Model[]): Promise<void> {
+    for (const model of models) {
+      ModelManager.instance().register(model)
+    }
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Register settings for the extension.
+   * @param settings
+   * @returns
+   */
   async registerSettings(settings: SettingComponentProps[]): Promise<void> {
     if (!this.name) {
       console.error('Extension name is not defined')
@@ -139,6 +157,12 @@ export abstract class BaseExtension implements ExtensionType {
     }
   }
 
+  /**
+   * Get the setting value for the key.
+   * @param key
+   * @param defaultValue
+   * @returns
+   */
   async getSetting<T>(key: string, defaultValue: T) {
     const keySetting = (await this.getSettings()).find((setting) => setting.key === key)
 
@@ -168,6 +192,10 @@ export abstract class BaseExtension implements ExtensionType {
     return
   }
 
+  /**
+   * Get the settings for the extension.
+   * @returns
+   */
   async getSettings(): Promise<SettingComponentProps[]> {
     if (!this.name) return []
 
@@ -189,6 +217,11 @@ export abstract class BaseExtension implements ExtensionType {
     }
   }
 
+  /**
+   * Update the settings for the extension.
+   * @param componentProps
+   * @returns
+   */
   async updateSettings(componentProps: Partial<SettingComponentProps>[]): Promise<void> {
     if (!this.name) return
 
diff --git a/core/src/browser/extensions/engines/AIEngine.test.ts b/core/src/browser/extensions/engines/AIEngine.test.ts
index 59dad280f..ab3280e1c 100644
--- a/core/src/browser/extensions/engines/AIEngine.test.ts
+++ b/core/src/browser/extensions/engines/AIEngine.test.ts
@@ -1,8 +1,6 @@
 import { AIEngine } from './AIEngine'
 import { events } from '../../events'
-import { ModelEvent, Model, ModelFile, InferenceEngine } from '../../../types'
-import { EngineManager } from './EngineManager'
-import { fs } from '../../fs'
+import { ModelEvent, Model } from '../../../types'
 
 jest.mock('../../events')
 jest.mock('./EngineManager')
@@ -26,7 +24,7 @@ describe('AIEngine', () => {
   })
 
   it('should load model if provider matches', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'test-provider' } as any
+    const model: any = { id: 'model1', engine: 'test-provider' } as any
 
     await engine.loadModel(model)
 
@@ -34,7 +32,7 @@ describe('AIEngine', () => {
   })
 
   it('should not load model if provider does not match', async () => {
-    const model: ModelFile = { id: 'model1', engine: 'other-provider' } as any
+    const model: any = { id: 'model1', engine: 'other-provider' } as any
 
     await engine.loadModel(model)
 
diff --git a/core/src/browser/extensions/engines/AIEngine.ts b/core/src/browser/extensions/engines/AIEngine.ts
index 75354de88..d0528b0ab 100644
--- a/core/src/browser/extensions/engines/AIEngine.ts
+++ b/core/src/browser/extensions/engines/AIEngine.ts
@@ -1,17 +1,14 @@
-import { getJanDataFolderPath, joinPath } from '../../core'
 import { events } from '../../events'
 import { BaseExtension } from '../../extension'
-import { fs } from '../../fs'
-import { MessageRequest, Model, ModelEvent, ModelFile } from '../../../types'
+import { MessageRequest, Model, ModelEvent } from '../../../types'
 import { EngineManager } from './EngineManager'
+import { ModelManager } from '../../models/manager'
 
 /**
  * Base AIEngine
  * Applicable to all AI Engines
  */
 export abstract class AIEngine extends BaseExtension {
-  private static modelsFolder = 'models'
-
   // The inference engine
   abstract provider: string
 
@@ -21,7 +18,7 @@ export abstract class AIEngine extends BaseExtension {
   override onLoad() {
     this.registerEngine()
 
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
     events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
   }
 
@@ -32,53 +29,10 @@ export abstract class AIEngine extends BaseExtension {
     EngineManager.instance().register(this)
   }
 
-  async registerModels(models: Model[]): Promise<void> {
-    const modelFolderPath = await joinPath([await getJanDataFolderPath(), AIEngine.modelsFolder])
-
-    let shouldNotifyModelUpdate = false
-    for (const model of models) {
-      const modelPath = await joinPath([modelFolderPath, model.id])
-      const isExist = await fs.existsSync(modelPath)
-
-      if (isExist) {
-        await this.migrateModelIfNeeded(model, modelPath)
-        continue
-      }
-
-      await fs.mkdir(modelPath)
-      await fs.writeFileSync(
-        await joinPath([modelPath, 'model.json']),
-        JSON.stringify(model, null, 2)
-      )
-      shouldNotifyModelUpdate = true
-    }
-
-    if (shouldNotifyModelUpdate) {
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-  }
-
-  async migrateModelIfNeeded(model: Model, modelPath: string): Promise<void> {
-    try {
-      const modelJson = await fs.readFileSync(await joinPath([modelPath, 'model.json']), 'utf-8')
-      const currentModel: Model = JSON.parse(modelJson)
-      if (currentModel.version !== model.version) {
-        await fs.writeFileSync(
-          await joinPath([modelPath, 'model.json']),
-          JSON.stringify(model, null, 2)
-        )
-
-        events.emit(ModelEvent.OnModelsUpdate, {})
-      }
-    } catch (error) {
-      console.warn('Error while try to migrating model', error)
-    }
-  }
-
   /**
    * Loads the model.
    */
-  async loadModel(model: ModelFile): Promise<any> {
+  async loadModel(model: Model): Promise<any> {
     if (model.engine.toString() !== this.provider) return Promise.resolve()
     events.emit(ModelEvent.OnModelReady, model)
     return Promise.resolve()
diff --git a/core/src/browser/extensions/engines/EngineManager.ts b/core/src/browser/extensions/engines/EngineManager.ts
index 2980c5c65..90ce75ac5 100644
--- a/core/src/browser/extensions/engines/EngineManager.ts
+++ b/core/src/browser/extensions/engines/EngineManager.ts
@@ -1,3 +1,4 @@
+import { InferenceEngine } from '../../../types'
 import { AIEngine } from './AIEngine'
 
 /**
@@ -20,6 +21,22 @@ export class EngineManager {
    * @returns The engine, if found.
    */
   get<T extends AIEngine>(provider: string): T | undefined {
+    // Backward compatible provider
+    // nitro is migrated to cortex
+    if (
+      [
+        InferenceEngine.nitro,
+        InferenceEngine.cortex,
+        InferenceEngine.cortex_llamacpp,
+        InferenceEngine.cortex_onnx,
+        InferenceEngine.cortex_tensorrtllm,
+        InferenceEngine.cortex_onnx,
+      ]
+        .map((e) => e.toString())
+        .includes(provider)
+    )
+      provider = InferenceEngine.cortex
+
     return this.engines.get(provider) as T | undefined
   }
 
@@ -27,6 +44,6 @@ export class EngineManager {
    * The instance of the engine manager.
    */
   static instance(): EngineManager {
-    return window.core?.engineManager as EngineManager ?? new EngineManager()
+    return (window.core?.engineManager as EngineManager) ?? new EngineManager()
   }
 }
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
index 4ae81496f..4a36f6b12 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@@ -3,7 +3,7 @@
  */
 import { LocalOAIEngine } from './LocalOAIEngine'
 import { events } from '../../events'
-import { ModelEvent, ModelFile, Model } from '../../../types'
+import { ModelEvent, Model } from '../../../types'
 import { executeOnMain, systemInformation, dirName } from '../../core'
 
 jest.mock('../../core', () => ({
@@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: null }
@@ -54,7 +54,6 @@ describe('LocalOAIEngine', () => {
 
     await engine.loadModel(model)
 
-    expect(dirName).toHaveBeenCalledWith(model.file_path)
     expect(systemInformation).toHaveBeenCalled()
     expect(executeOnMain).toHaveBeenCalledWith(
       engine.nodeModule,
@@ -66,7 +65,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should handle load model error', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: 'load error' }
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts
index 123b9a593..6c70fa186 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@@ -1,6 +1,6 @@
 import { executeOnMain, systemInformation, dirName } from '../../core'
 import { events } from '../../events'
-import { Model, ModelEvent, ModelFile } from '../../../types'
+import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'
 
 /**
@@ -22,35 +22,36 @@ export abstract class LocalOAIEngine extends OAIEngine {
   override onLoad() {
     super.onLoad()
     // These events are applicable to local inference providers
-    events.on(ModelEvent.OnModelInit, (model: ModelFile) => this.loadModel(model))
+    events.on(ModelEvent.OnModelInit, (model: Model) => this.loadModel(model))
     events.on(ModelEvent.OnModelStop, (model: Model) => this.unloadModel(model))
   }
 
   /**
    * Load the model.
    */
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
     if (model.engine.toString() !== this.provider) return
-    const modelFolder = await dirName(model.file_path)
-    const systemInfo = await systemInformation()
-    const res = await executeOnMain(
-      this.nodeModule,
-      this.loadModelFunctionName,
-      {
-        modelFolder,
-        model,
-      },
-      systemInfo
-    )
+    // const modelFolder = await dirName(model.file_path)
+    // const systemInfo = await systemInformation()
+    // const res = await executeOnMain(
+    //   this.nodeModule,
+    //   this.loadModelFunctionName,
+    //   {
+    //     modelFolder,
+    //     model,
+    //   },
+    //   systemInfo
+    // )
 
-    if (res?.error) {
-      events.emit(ModelEvent.OnModelFail, { error: res.error })
-      return Promise.reject(res.error)
-    } else {
-      this.loadedModel = model
-      events.emit(ModelEvent.OnModelReady, model)
-      return Promise.resolve()
-    }
+    // if (res?.error) {
+    //   events.emit(ModelEvent.OnModelFail, { error: res.error })
+    //   return Promise.reject(res.error)
+    // } else {
+    //   this.loadedModel = model
+    //   events.emit(ModelEvent.OnModelReady, model)
+    //   return Promise.resolve()
+    // }
+    return Promise.resolve()
   }
   /**
    * Stops the model.
diff --git a/core/src/browser/extensions/engines/OAIEngine.ts b/core/src/browser/extensions/engines/OAIEngine.ts
index a8dde4677..694a87264 100644
--- a/core/src/browser/extensions/engines/OAIEngine.ts
+++ b/core/src/browser/extensions/engines/OAIEngine.ts
@@ -55,7 +55,21 @@ export abstract class OAIEngine extends AIEngine {
    * Inference request
    */
   override async inference(data: MessageRequest) {
-    if (data.model?.engine?.toString() !== this.provider) return
+    if (!data.model?.id) {
+      events.emit(MessageEvent.OnMessageResponse, {
+        status: MessageStatus.Error,
+        content: [
+          {
+            type: ContentType.Text,
+            text: {
+              value: 'No model ID provided',
+              annotations: [],
+            },
+          },
+        ],
+      })
+      return
+    }
 
     const timestamp = Date.now()
     const message: ThreadMessage = {
@@ -89,7 +103,6 @@ export abstract class OAIEngine extends AIEngine {
       model: model.id,
       stream: true,
       ...model.parameters,
-      ...(this.provider === 'nitro' ? { engine: 'cortex.llamacpp'} : {}),
     }
     if (this.transformPayload) {
       requestBody = this.transformPayload(requestBody)
diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index 024ced470..9713256b3 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -10,7 +10,7 @@ export function requestInference(
   requestBody: any,
   model: {
     id: string
-    parameters: ModelRuntimeParams
+    parameters?: ModelRuntimeParams
   },
   controller?: AbortController,
   headers?: HeadersInit,
@@ -22,7 +22,7 @@ export function requestInference(
       headers: {
         'Content-Type': 'application/json',
         'Access-Control-Allow-Origin': '*',
-        'Accept': model.parameters.stream ? 'text/event-stream' : 'application/json',
+        'Accept': model.parameters?.stream ? 'text/event-stream' : 'application/json',
         ...headers,
       },
       body: JSON.stringify(requestBody),
@@ -45,7 +45,7 @@ export function requestInference(
           subscriber.complete()
           return
         }
-        if (model.parameters.stream === false) {
+        if (model.parameters?.stream === false) {
           const data = await response.json()
           if (transformResponse) {
             subscriber.next(transformResponse(data))
diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index 040542927..d111c1d3a 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -1,13 +1,5 @@
 import { BaseExtension, ExtensionTypeEnum } from '../extension'
-import {
-  GpuSetting,
-  HuggingFaceRepoData,
-  ImportingModel,
-  Model,
-  ModelFile,
-  ModelInterface,
-  OptionType,
-} from '../../types'
+import { Model, ModelInterface, OptionType } from '../../types'
 
 /**
  * Model extension for managing models.
@@ -20,17 +12,10 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
     return ExtensionTypeEnum.Model
   }
 
-  abstract downloadModel(
-    model: Model,
-    gpuSettings?: GpuSetting,
-    network?: { proxy: string; ignoreSSL?: boolean }
-  ): Promise<void>
-  abstract cancelModelDownload(modelId: string): Promise<void>
-  abstract deleteModel(model: ModelFile): Promise<void>
-  abstract getDownloadedModels(): Promise<ModelFile[]>
-  abstract getConfiguredModels(): Promise<ModelFile[]>
-  abstract importModels(models: ImportingModel[], optionType: OptionType): Promise<void>
-  abstract updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile>
-  abstract fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData>
-  abstract getDefaultModel(): Promise<Model>
+  abstract getModels(): Promise<Model[]>
+  abstract pullModel(model: string): Promise<void>
+  abstract cancelModelPull(modelId: string): Promise<void>
+  abstract importModel(model: string, modePath: string): Promise<void>
+  abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
+  abstract deleteModel(model: string): Promise<void>
 }
diff --git a/core/src/browser/index.test.ts b/core/src/browser/index.test.ts
index 339cd9046..c8cabbb0b 100644
--- a/core/src/browser/index.test.ts
+++ b/core/src/browser/index.test.ts
@@ -1,32 +1,37 @@
-import * as Core from './core';
-import * as Events from './events';
-import * as FileSystem from './fs';
-import * as Extension from './extension';
-import * as Extensions from './extensions';
-import * as Tools from './tools';
+import * as Core from './core'
+import * as Events from './events'
+import * as FileSystem from './fs'
+import * as Extension from './extension'
+import * as Extensions from './extensions'
+import * as Tools from './tools'
+import * as Models from './models'
 
 describe('Module Tests', () => {
-    it('should export Core module', () => {
-        expect(Core).toBeDefined();
-    });
+  it('should export Core module', () => {
+    expect(Core).toBeDefined()
+  })
 
-    it('should export Event module', () => {
-        expect(Events).toBeDefined();
-    });
+  it('should export Event module', () => {
+    expect(Events).toBeDefined()
+  })
 
-    it('should export Filesystem module', () => {
-        expect(FileSystem).toBeDefined();
-    });
+  it('should export Filesystem module', () => {
+    expect(FileSystem).toBeDefined()
+  })
 
-    it('should export Extension module', () => {
-        expect(Extension).toBeDefined();
-    });
+  it('should export Extension module', () => {
+    expect(Extension).toBeDefined()
+  })
 
-    it('should export all base extensions', () => {
-        expect(Extensions).toBeDefined();
-    });
+  it('should export all base extensions', () => {
+    expect(Extensions).toBeDefined()
+  })
 
-    it('should export all base tools', () => {
-        expect(Tools).toBeDefined();
-    });
-});
\ No newline at end of file
+  it('should export all base tools', () => {
+    expect(Tools).toBeDefined()
+  })
+
+  it('should export all base tools', () => {
+    expect(Models).toBeDefined()
+  })
+})
diff --git a/core/src/browser/index.ts b/core/src/browser/index.ts
index a7803c7e0..a6ce187ca 100644
--- a/core/src/browser/index.ts
+++ b/core/src/browser/index.ts
@@ -33,3 +33,9 @@ export * from './extensions'
  * @module
  */
 export * from './tools'
+
+/**
+ * Export all base models.
+ * @module
+ */
+export * from './models'
diff --git a/core/src/browser/models/index.ts b/core/src/browser/models/index.ts
new file mode 100644
index 000000000..c16479b2b
--- /dev/null
+++ b/core/src/browser/models/index.ts
@@ -0,0 +1,5 @@
+/**
+ * Export ModelManager
+ * @module
+ */
+export { ModelManager } from './manager'
diff --git a/core/src/browser/models/manager.ts b/core/src/browser/models/manager.ts
new file mode 100644
index 000000000..4853989fe
--- /dev/null
+++ b/core/src/browser/models/manager.ts
@@ -0,0 +1,40 @@
+import { Model, ModelEvent } from '../../types'
+import { events } from '../events'
+
+/**
+ * Manages the registered models across extensions.
+ */
+export class ModelManager {
+  public models = new Map<string, Model>()
+
+  constructor() {
+    if (window) {
+      window.core.modelManager = this
+    }
+  }
+
+  /**
+   * Registers a model.
+   * @param model - The model to register.
+   */
+  register<T extends Model>(model: T) {
+    this.models.set(model.id, model)
+    events.emit(ModelEvent.OnModelsUpdate, {})
+  }
+
+  /**
+   * Retrieves a model by it's id.
+   * @param id - The id of the model to retrieve.
+   * @returns The model, if found.
+   */
+  get<T extends Model>(id: string): T | undefined {
+    return this.models.get(id) as T | undefined
+  }
+
+  /**
+   * The instance of the tool manager.
+   */
+  static instance(): ModelManager {
+    return (window.core?.modelManager as ModelManager) ?? new ModelManager()
+  }
+}
diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts
index 8f1ff70bf..c0de0f5e8 100644
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@@ -69,9 +69,11 @@ export enum DownloadRoute {
 }
 
 export enum DownloadEvent {
-  onFileDownloadUpdate = 'onFileDownloadUpdate',
-  onFileDownloadError = 'onFileDownloadError',
-  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadUpdate = 'DownloadUpdated',
+  onFileDownloadError = 'DownloadError',
+  onFileDownloadSuccess = 'DownloadSuccess',
+  onFileDownloadStopped = 'DownloadStopped',
+  onFileDownloadStarted = 'DownloadStarted',
   onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
 
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 933c698c3..ed1db94bd 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -6,8 +6,8 @@ import { FileMetadata } from '../file'
  */
 export type ModelInfo = {
   id: string
-  settings: ModelSettingParams
-  parameters: ModelRuntimeParams
+  settings?: ModelSettingParams
+  parameters?: ModelRuntimeParams
   engine?: InferenceEngine
 }
 
@@ -28,9 +28,10 @@ export enum InferenceEngine {
   nitro_tensorrt_llm = 'nitro-tensorrt-llm',
   cohere = 'cohere',
   nvidia = 'nvidia',
-  cortex_llamacpp = 'cortex.llamacpp',
-  cortex_onnx = 'cortex.onnx',
-  cortex_tensorrtllm = 'cortex.tensorrt-llm',
+  cortex = 'cortex',
+  cortex_llamacpp = 'llama-cpp',
+  cortex_onnx = 'onnxruntime',
+  cortex_tensorrtllm = '.tensorrt-llm',
 }
 
 export type ModelArtifact = {
@@ -153,8 +154,3 @@ export type ModelRuntimeParams = {
 export type ModelInitFailed = Model & {
   error: Error
 }
-
-/**
- * ModelFile is the model.json entity and it's file metadata
- */
-export type ModelFile = Model & FileMetadata
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index 08d456b7e..088118f69 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,5 +1,5 @@
-import { GpuSetting } from '../miscellaneous'
-import { Model, ModelFile } from './modelEntity'
+import { Model } from './modelEntity'
+import { OptionType } from './modelImport'
 
 /**
  * Model extension for managing models.
@@ -8,38 +8,41 @@ export interface ModelInterface {
   /**
    * Downloads a model.
    * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model has been downloaded.
    */
-  downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void>
+  pullModel(model: string): Promise<void>
 
   /**
    * Cancels the download of a specific model.
    * @param {string} modelId - The ID of the model to cancel the download for.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  cancelModelDownload(modelId: string): Promise<void>
+  cancelModelPull(modelId: string): Promise<void>
 
   /**
    * Deletes a model.
    * @param modelId - The ID of the model to delete.
    * @returns A Promise that resolves when the model has been deleted.
    */
-  deleteModel(model: ModelFile): Promise<void>
+  deleteModel(model: string): Promise<void>
 
   /**
-   * Gets a list of downloaded models.
+   * Gets downloaded models.
    * @returns A Promise that resolves with an array of downloaded models.
    */
-  getDownloadedModels(): Promise<ModelFile[]>
+  getModels(): Promise<Model[]>
 
   /**
-   * Gets a list of configured models.
-   * @returns A Promise that resolves with an array of configured models.
+   * Update a pulled model's metadata
+   * @param model - The model to update.
+   * @returns A Promise that resolves when the model has been updated.
    */
-  getConfiguredModels(): Promise<ModelFile[]>
+  updateModel(model: Partial<Model>): Promise<Model>
+
+  /**
+   * Import an existing model file.
+   * @param model id of the model to import
+   * @param modelPath - path of the model file
+   */
+  importModel(model: string, modePath: string): Promise<void>
 }
diff --git a/docs/src/pages/docs/built-in/llama-cpp.mdx b/docs/src/pages/docs/built-in/llama-cpp.mdx
index 5b7b0453a..8e2fa8498 100644
--- a/docs/src/pages/docs/built-in/llama-cpp.mdx
+++ b/docs/src/pages/docs/built-in/llama-cpp.mdx
@@ -102,7 +102,7 @@ Enable the GPU acceleration option within the Jan application by following the [
     ],
     "size": 669000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
 ```
 ### Step 2: Modify the `model.json`
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index 28d629aa8..3386029fa 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -10,8 +10,6 @@ import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
 import { readEmbeddingEngine } from './engine'
 
-import path from 'path'
-
 export class Retrieval {
   public chunkSize: number = 100
   public chunkOverlap?: number = 0
diff --git a/extensions/inference-nitro-extension/.gitignore b/extensions/inference-cortex-extension/.gitignore
similarity index 100%
rename from extensions/inference-nitro-extension/.gitignore
rename to extensions/inference-cortex-extension/.gitignore
diff --git a/extensions/inference-nitro-extension/README.md b/extensions/inference-cortex-extension/README.md
similarity index 100%
rename from extensions/inference-nitro-extension/README.md
rename to extensions/inference-cortex-extension/README.md
diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-cortex-extension/download.bat
similarity index 93%
rename from extensions/inference-nitro-extension/download.bat
rename to extensions/inference-cortex-extension/download.bat
index 7acd385d5..d764b6df8 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -4,10 +4,10 @@ set /p CORTEX_VERSION=<./bin/version.txt
 
 @REM Download cortex.llamacpp binaries
 set VERSION=v0.1.25
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.25-windows-amd64
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64
 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
 
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-cpp-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
new file mode 100755
index 000000000..fe1f8af9f
--- /dev/null
+++ b/extensions/inference-cortex-extension/download.sh
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+# Read CORTEX_VERSION
+CORTEX_VERSION=$(cat ./bin/version.txt)
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34"
+# Detect platform
+OS_TYPE=$(uname)
+
+if [ "$OS_TYPE" == "Linux" ]; then
+    # Linux downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
+    chmod +x "./bin/cortex"
+
+    # Download engines for Linux
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
+
+elif [ "$OS_TYPE" == "Darwin" ]; then
+    # macOS downloads
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz"  -e --strip 1 -o "./bin" 1
+    chmod +x "./bin/cortex"
+
+    # Download engines for macOS
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
+
+else
+    echo "Unsupported operating system: $OS_TYPE"
+    exit 1
+fi
diff --git a/extensions/inference-nitro-extension/jest.config.js b/extensions/inference-cortex-extension/jest.config.js
similarity index 100%
rename from extensions/inference-nitro-extension/jest.config.js
rename to extensions/inference-cortex-extension/jest.config.js
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-cortex-extension/package.json
similarity index 82%
rename from extensions/inference-nitro-extension/package.json
rename to extensions/inference-cortex-extension/package.json
index 15ceaf566..920989f3b 100644
--- a/extensions/inference-nitro-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -10,12 +10,12 @@
   "scripts": {
     "test": "jest",
     "build": "tsc --module commonjs && rollup -c rollup.config.ts",
-    "downloadnitro:linux:darwin": "./download.sh",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
-    "build:publish": "yarn test && run-script-os"
+    "downloadcortex:linux:darwin": "./download.sh",
+    "downloadcortex:win32": "download.bat",
+    "downloadcortex": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish:win32:linux": "rimraf *.tgz --glob && yarn build && npm run downloadcortex && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../pre-install",
+    "build:publish": "run-script-os"
   },
   "exports": {
     ".": "./dist/index.js",
@@ -50,6 +50,7 @@
     "cpu-instructions": "^0.0.13",
     "decompress": "^4.2.1",
     "fetch-retry": "^5.0.6",
+    "ky": "^1.7.2",
     "rxjs": "^7.8.1",
     "tcp-port-used": "^1.0.2",
     "terminate": "2.6.1",
diff --git a/extensions/inference-nitro-extension/resources/default_settings.json b/extensions/inference-cortex-extension/resources/default_settings.json
similarity index 100%
rename from extensions/inference-nitro-extension/resources/default_settings.json
rename to extensions/inference-cortex-extension/resources/default_settings.json
diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
rename to extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
index 8c3029be0..f6e3d08e9 100644
--- a/extensions/inference-nitro-extension/resources/models/aya-23-35b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-35b/model.json
@@ -31,5 +31,5 @@
       "tags": ["34B", "Finetuned"],
       "size": 21556982144
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
rename to extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
index 163373014..463f7eec7 100644
--- a/extensions/inference-nitro-extension/resources/models/aya-23-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/aya-23-8b/model.json
@@ -31,5 +31,5 @@
       "tags": ["7B", "Finetuned"],
       "size": 5056982144
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/bakllava-1/model.json
rename to extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
index 93f87c7f4..391c93990 100644
--- a/extensions/inference-nitro-extension/resources/models/bakllava-1/model.json
+++ b/extensions/inference-cortex-extension/resources/models/bakllava-1/model.json
@@ -31,5 +31,5 @@
     "tags": ["Vision"],
     "size": 5750000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
index fb2a5f346..7bd5bf3a4 100644
--- a/extensions/inference-nitro-extension/resources/models/codeninja-1.0-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codeninja-1.0-7b/model.json
@@ -30,5 +30,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
rename to extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
index f90f848dd..2cce063e6 100644
--- a/extensions/inference-nitro-extension/resources/models/codestral-22b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/codestral-22b/model.json
@@ -31,6 +31,6 @@
       "tags": ["22B", "Finetuned", "Featured"],
       "size": 13341237440
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
diff --git a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
index 6b166eea5..13518604c 100644
--- a/extensions/inference-nitro-extension/resources/models/command-r-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/command-r-34b/model.json
@@ -31,6 +31,6 @@
       "tags": ["34B", "Finetuned"],
       "size": 21500000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
index 4d825cfeb..6722d253d 100644
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-1.3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-1.3b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Tiny"],
     "size": 1430000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
index e87d6a643..8a2e271cd 100644
--- a/extensions/inference-nitro-extension/resources/models/deepseek-coder-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/deepseek-coder-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["33B"],
     "size": 19940000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
index 837b10ce3..3278c9a81 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-2b/model.json
@@ -31,5 +31,5 @@
     "tags": ["2B", "Finetuned", "Tiny"],
     "size": 1630000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
index b29043483..9a57f9b37 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-1.1-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-1.1-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 5330000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
index 4db74ac6f..66eaff7c2 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-27b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-27b/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 16600000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
index d85759f9b..60be558b8 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-2b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-2b/model.json
@@ -38,5 +38,5 @@
     ],
     "size": 1710000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json
rename to extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
index 8f6af15d9..67acaad09 100644
--- a/extensions/inference-nitro-extension/resources/models/gemma-2-9b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/gemma-2-9b/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 5760000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
index 0c770b189..c91a0a73b 100644
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-70b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-70b/model.json
@@ -31,5 +31,5 @@
     "tags": ["70B", "Foundational Model"],
     "size": 43920000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
index 9efd634b5..4a28f6004 100644
--- a/extensions/inference-nitro-extension/resources/models/llama2-chat-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama2-chat-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Foundational Model"],
     "size": 4080000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
index 4d84b9967..3456a185e 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-8b-instruct/model.json
@@ -31,5 +31,5 @@
       "tags": ["8B"],
       "size": 4920000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
index a3601c8cd..718629fb0 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3-hermes-8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3-hermes-8b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 4920000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
similarity index 98%
rename from extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
index 1f4931e11..aec73719e 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.1-70b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-70b-instruct/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 42500000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
similarity index 98%
rename from extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
index 4b21534bc..ec9a0284b 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.1-8b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.1-8b-instruct/model.json
@@ -37,5 +37,5 @@
     ],
     "size": 4920000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
index 5be08409d..0fe7d3316 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.2-1b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-1b-instruct/model.json
@@ -31,5 +31,5 @@
     "tags": ["1B", "Featured"],
     "size": 1320000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
index aacb3f0f8..299362fbf 100644
--- a/extensions/inference-nitro-extension/resources/models/llama3.2-3b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llama3.2-3b-instruct/model.json
@@ -31,5 +31,5 @@
     "tags": ["3B", "Featured"],
     "size": 3420000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
rename to extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
index 94b62ec82..3230df5b0 100644
--- a/extensions/inference-nitro-extension/resources/models/llamacorn-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llamacorn-1.1b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 1170000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llava-13b/model.json
rename to extensions/inference-cortex-extension/resources/models/llava-13b/model.json
index 6d94fd272..791c98749 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-13b/model.json
@@ -32,5 +32,5 @@
     "tags": ["Vision"],
     "size": 7870000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/llava-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/llava-7b/model.json
index 1fdd75247..b22899c96 100644
--- a/extensions/inference-nitro-extension/resources/models/llava-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/llava-7b/model.json
@@ -32,5 +32,5 @@
     "tags": ["Vision"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
rename to extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
index 88f701466..9b568e468 100644
--- a/extensions/inference-nitro-extension/resources/models/mistral-ins-7b-q4/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mistral-ins-7b-q4/model.json
@@ -32,5 +32,5 @@
     "size": 4370000000,
     "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/mistral-ins-7b-q4/cover.png"
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
index 4413b415c..c711065ff 100644
--- a/extensions/inference-nitro-extension/resources/models/mixtral-8x7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/mixtral-8x7b-instruct/model.json
@@ -30,5 +30,5 @@
     "tags": ["70B", "Foundational Model"],
     "size": 26440000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
index 10c17c310..1999035aa 100644
--- a/extensions/inference-nitro-extension/resources/models/noromaid-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/noromaid-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
index e743a74c9..05371b69e 100644
--- a/extensions/inference-nitro-extension/resources/models/openchat-3.5-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/openchat-3.5-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Recommended", "7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
rename to extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
index 6459b049d..a2197dab2 100644
--- a/extensions/inference-nitro-extension/resources/models/phi3-3.8b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 2320000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
 }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
rename to extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
index 7331b2fd8..f7131ee98 100644
--- a/extensions/inference-nitro-extension/resources/models/phi3-medium/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json
@@ -34,5 +34,5 @@
       ],
       "size": 8366000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/phind-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/phind-34b/model.json
index 14099a635..f6e302173 100644
--- a/extensions/inference-nitro-extension/resources/models/phind-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/phind-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["34B", "Finetuned"],
     "size": 20220000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
index 85081a605..be37cac0d 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen-7b/model.json
@@ -31,5 +31,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4770000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
index a7613982c..210848a43 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2-7b/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
index 04913b874..96e4d214c 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-14b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-14b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["14B", "Featured"],
       "size": 8990000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
index 43ba30c56..20681dff4 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-32b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-32b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["32B"],
       "size": 19900000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
index 1852a0909..b741539eb 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-72b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-72b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["72B"],
       "size": 47400000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
index b47511f96..6741aef64 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-7b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Featured"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
rename to extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
index 2f1080b2c..9162c8a43 100644
--- a/extensions/inference-nitro-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
+++ b/extensions/inference-cortex-extension/resources/models/qwen2.5-coder-7b-instruct/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Featured"],
       "size": 4680000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
rename to extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
index 938e03fb7..a6c84bd17 100644
--- a/extensions/inference-nitro-extension/resources/models/stable-zephyr-3b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stable-zephyr-3b/model.json
@@ -31,5 +31,5 @@
       "tags": ["3B", "Finetuned", "Tiny"],
       "size": 2970000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
index c17d1c35e..ffb32922e 100644
--- a/extensions/inference-nitro-extension/resources/models/stealth-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/stealth-v1.2-7b/model.json
@@ -30,5 +30,5 @@
     "tags": ["7B", "Finetuned"],
     "size": 4370000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
rename to extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
index a49e79073..b6aeea3e3 100644
--- a/extensions/inference-nitro-extension/resources/models/tinyllama-1.1b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/tinyllama-1.1b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Tiny", "Foundation Model"],
     "size": 669000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
index 6c9aa2b89..fae5d0ca5 100644
--- a/extensions/inference-nitro-extension/resources/models/trinity-v1.2-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/trinity-v1.2-7b/model.json
@@ -31,5 +31,5 @@
     "size": 4370000000,
     "cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
rename to extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
index b84f2c676..46b6999a6 100644
--- a/extensions/inference-nitro-extension/resources/models/vistral-7b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/vistral-7b/model.json
@@ -31,6 +31,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4410000000
     },
-    "engine": "nitro"
+    "engine": "llama-cpp"
   }
   
\ No newline at end of file
diff --git a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
rename to extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
index 101eedfd1..cf39ad857 100644
--- a/extensions/inference-nitro-extension/resources/models/wizardcoder-13b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/wizardcoder-13b/model.json
@@ -31,5 +31,5 @@
     "tags": ["Recommended", "13B", "Finetuned"],
     "size": 7870000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
similarity index 97%
rename from extensions/inference-nitro-extension/resources/models/yi-34b/model.json
rename to extensions/inference-cortex-extension/resources/models/yi-34b/model.json
index db7df9f2d..4f56650d7 100644
--- a/extensions/inference-nitro-extension/resources/models/yi-34b/model.json
+++ b/extensions/inference-cortex-extension/resources/models/yi-34b/model.json
@@ -31,5 +31,5 @@
     "tags": ["34B", "Foundational Model"],
     "size": 20660000000
   },
-  "engine": "nitro"
+  "engine": "llama-cpp"
 }
diff --git a/extensions/inference-nitro-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
similarity index 93%
rename from extensions/inference-nitro-extension/rollup.config.ts
rename to extensions/inference-cortex-extension/rollup.config.ts
index 1a8badb6f..d0e9f5fbe 100644
--- a/extensions/inference-nitro-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -114,19 +114,7 @@ export default [
         ]),
         NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        INFERENCE_URL: JSON.stringify(
-          process.env.INFERENCE_URL ||
-            'http://127.0.0.1:3928/inferences/server/chat_completion'
-        ),
-        TROUBLESHOOTING_URL: JSON.stringify(
-          'https://jan.ai/guides/troubleshooting'
-        ),
-        JAN_SERVER_INFERENCE_URL: JSON.stringify(
-          'http://localhost:1337/v1/chat/completions'
-        ),
-        CUDA_DOWNLOAD_URL: JSON.stringify(
-          'https://catalog.jan.ai/dist/cuda-dependencies/<version>/<platform>/cuda.tar.gz'
-        ),
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291/v1'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts
similarity index 70%
rename from extensions/inference-nitro-extension/src/@types/global.d.ts
rename to extensions/inference-cortex-extension/src/@types/global.d.ts
index 85c9b939f..64ae5a6e7 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@@ -1,7 +1,5 @@
 declare const NODE: string
-declare const INFERENCE_URL: string
-declare const TROUBLESHOOTING_URL: string
-declare const JAN_SERVER_INFERENCE_URL: string
+declare const CORTEX_API_URL: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
 
diff --git a/extensions/inference-nitro-extension/src/babel.config.js b/extensions/inference-cortex-extension/src/babel.config.js
similarity index 100%
rename from extensions/inference-nitro-extension/src/babel.config.js
rename to extensions/inference-cortex-extension/src/babel.config.js
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
new file mode 100644
index 000000000..93036fc4d
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -0,0 +1,111 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module inference-extension/src/index
+ */
+
+import {
+  Model,
+  executeOnMain,
+  systemInformation,
+  log,
+  joinPath,
+  dirName,
+  LocalOAIEngine,
+  InferenceEngine,
+} from '@janhq/core'
+
+import ky from 'ky'
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class JanInferenceCortexExtension extends LocalOAIEngine {
+  // DEPRECATED
+  nodeModule: string = 'node'
+
+  provider: string = InferenceEngine.cortex
+
+  /**
+   * The URL for making inference requests.
+   */
+  inferenceUrl = `${CORTEX_API_URL}/chat/completions`
+
+  /**
+   * Subscribes to events emitted by the @janhq/core package.
+   */
+  async onLoad() {
+    const models = MODELS as Model[]
+
+    this.registerModels(models)
+
+    super.onLoad()
+
+    // Run the process watchdog
+    const systemInfo = await systemInformation()
+    executeOnMain(NODE, 'run', systemInfo)
+  }
+
+  onUnload(): void {
+    executeOnMain(NODE, 'dispose')
+    super.onUnload()
+  }
+
+  override async loadModel(
+    model: Model & { file_path?: string }
+  ): Promise<void> {
+    // Legacy model cache - should import
+    if (model.engine === InferenceEngine.nitro && model.file_path) {
+      // Try importing the model
+      await ky
+        .post(`${CORTEX_API_URL}/models/${model.id}`, {
+          json: { model: model.id, modelPath: await this.modelPath(model) },
+        })
+        .json()
+        .catch((e) => log(e.message ?? e ?? ''))
+    }
+
+    return ky
+      .post(`${CORTEX_API_URL}/models/start`, {
+        json: {
+          ...model.settings,
+          model: model.id,
+          engine:
+            model.engine === InferenceEngine.nitro // Legacy model cache
+              ? InferenceEngine.cortex_llamacpp
+              : model.engine,
+        },
+      })
+      .json()
+      .catch(async (e) => {
+        throw (await e.response?.json()) ?? e
+      })
+      .then()
+  }
+
+  override async unloadModel(model: Model): Promise<void> {
+    return ky
+      .post(`${CORTEX_API_URL}/models/stop`, {
+        json: { model: model.id },
+      })
+      .json()
+      .then()
+  }
+
+  private async modelPath(
+    model: Model & { file_path?: string }
+  ): Promise<string> {
+    if (!model.file_path) return model.id
+    return await joinPath([
+      await dirName(model.file_path),
+      model.sources[0]?.filename ??
+        model.settings?.llama_model_path ??
+        model.sources[0]?.url.split('/').pop() ??
+        model.id,
+    ])
+  }
+}
diff --git a/extensions/inference-nitro-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
similarity index 83%
rename from extensions/inference-nitro-extension/src/node/execute.test.ts
rename to extensions/inference-cortex-extension/src/node/execute.test.ts
index dfd8b35a9..89110fbd9 100644
--- a/extensions/inference-nitro-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableNitroFile } from './execute'
+import { executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 
@@ -30,7 +30,7 @@ jest.mock('cpu-instructions', () => ({
 let mock = cpuInfo.cpuInfo as jest.Mock
 mock.mockReturnValue([])
 
-describe('test executable nitro file', () => {
+describe('test executable cortex file', () => {
   afterAll(function () {
     Object.defineProperty(process, 'platform', {
       value: originalPlatform,
@@ -44,10 +44,13 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'arm64',
     })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`mac-arm64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-arm64/cortex-cpp`) : expect.anything(),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`/cortex`)
+            : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -55,10 +58,13 @@ describe('test executable nitro file', () => {
     Object.defineProperty(process, 'arch', {
       value: 'x64',
     })
-    expect(executableNitroFile(testSettings)).toEqual(
+    expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`mac-x64`),
-        executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+        executablePath:
+          originalPlatform === 'darwin'
+            ? expect.stringContaining(`/cortex`)
+            : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -73,10 +79,10 @@ describe('test executable nitro file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -107,10 +113,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -141,10 +147,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp.exe`),
+        executablePath: expect.stringContaining(`/cortex.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -159,10 +165,10 @@ describe('test executable nitro file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -193,10 +199,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux-cuda-11-7`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -227,10 +233,10 @@ describe('test executable nitro file', () => {
         },
       ],
     }
-    expect(executableNitroFile(settings)).toEqual(
+    expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux-cuda-12-0`),
-        executablePath: expect.stringContaining(`cortex-cpp`),
+        executablePath: expect.stringContaining(`/cortex`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -251,10 +257,10 @@ describe('test executable nitro file', () => {
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
 
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -274,10 +280,10 @@ describe('test executable nitro file', () => {
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`win-${instruction}`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          executablePath: expect.stringContaining(`/cortex.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
@@ -313,10 +319,10 @@ describe('test executable nitro file', () => {
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`win-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp.exe`),
+          executablePath: expect.stringContaining(`/cortex.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -352,10 +358,10 @@ describe('test executable nitro file', () => {
     }
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-cuda-12-0`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -392,10 +398,10 @@ describe('test executable nitro file', () => {
     }
     cpuInstructions.forEach((instruction) => {
       mock.mockReturnValue([instruction])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-vulkan`),
-          executablePath: expect.stringContaining(`cortex-cpp`),
+          executablePath: expect.stringContaining(`/cortex`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -418,10 +424,13 @@ describe('test executable nitro file', () => {
         run_mode: 'cpu',
       }
       mock.mockReturnValue([])
-      expect(executableNitroFile(settings)).toEqual(
+      expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`mac-x64`),
-          executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`mac-x64/cortex-cpp`) : expect.anything(),
+          executablePath:
+            originalPlatform === 'darwin'
+              ? expect.stringContaining(`/cortex`)
+              : expect.anything(),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
diff --git a/extensions/inference-nitro-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
similarity index 93%
rename from extensions/inference-nitro-extension/src/node/execute.ts
rename to extensions/inference-cortex-extension/src/node/execute.ts
index 595063ed4..0febe8adf 100644
--- a/extensions/inference-nitro-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -2,7 +2,7 @@ import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
 
-export interface NitroExecutableOptions {
+export interface CortexExecutableOptions {
   enginePath: string
   executablePath: string
   cudaVisibleDevices: string
@@ -81,9 +81,9 @@ const cpuInstructions = () => {
  * Find which executable file to run based on the current platform.
  * @returns The name of the executable file to run.
  */
-export const executableNitroFile = (
+export const executableCortexFile = (
   gpuSetting?: GpuSetting
-): NitroExecutableOptions => {
+): CortexExecutableOptions => {
   let engineFolder = [
     os(),
     ...(gpuSetting?.vulkan
@@ -99,7 +99,7 @@ export const executableNitroFile = (
     .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `${process.platform === 'darwin' ? `${os()}/` : ''}cortex-cpp${extension()}`
+  let binaryName = `cortex${extension()}`
 
   return {
     enginePath: path.join(__dirname, '..', 'bin', engineFolder),
diff --git a/extensions/inference-cortex-extension/src/node/index.test.ts b/extensions/inference-cortex-extension/src/node/index.test.ts
new file mode 100644
index 000000000..ff9d7c2fc
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/index.test.ts
@@ -0,0 +1,94 @@
+jest.mock('@janhq/core/node', () => ({
+  ...jest.requireActual('@janhq/core/node'),
+  getJanDataFolderPath: () => '',
+  getSystemResourceInfo: () => {
+    return {
+      cpu: {
+        cores: 1,
+        logicalCores: 1,
+        threads: 1,
+        model: 'model',
+        speed: 1,
+      },
+      memory: {
+        total: 1,
+        free: 1,
+      },
+      gpu: {
+        model: 'model',
+        memory: 1,
+        cuda: {
+          version: 'version',
+          devices: 'devices',
+        },
+        vulkan: {
+          version: 'version',
+          devices: 'devices',
+        },
+      },
+    }
+  },
+}))
+
+jest.mock('fs', () => ({
+  default: {
+    readdirSync: () => [],
+  },
+}))
+
+jest.mock('child_process', () => ({
+  exec: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+    }
+  },
+  spawn: () => {
+    return {
+      stdout: { on: jest.fn() },
+      stderr: { on: jest.fn() },
+      on: jest.fn(),
+      pid: '111',
+    }
+  },
+}))
+
+jest.mock('./execute', () => ({
+  executableCortexFile: () => {
+    return {
+      enginePath: 'enginePath',
+      executablePath: 'executablePath',
+      cudaVisibleDevices: 'cudaVisibleDevices',
+      vkVisibleDevices: 'vkVisibleDevices',
+    }
+  },
+}))
+
+import index from './index'
+
+describe('dispose', () => {
+  it('should dispose a model successfully on Mac', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'darwin',
+    })
+
+    // Call the dispose function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+
+  it('should kill the subprocess successfully on Windows', async () => {
+    Object.defineProperty(process, 'platform', {
+      value: 'win32',
+    })
+
+    // Call the killSubprocess function
+    const result = await index.dispose()
+
+    // Assert that the result is as expected
+    expect(result).toBeUndefined()
+  })
+})
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
new file mode 100644
index 000000000..f1c365ade
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -0,0 +1,83 @@
+import path from 'path'
+import { log, SystemInformation } from '@janhq/core/node'
+import { executableCortexFile } from './execute'
+import { ProcessWatchdog } from './watchdog'
+
+// The HOST address to use for the Nitro subprocess
+const LOCAL_PORT = '39291'
+let watchdog: ProcessWatchdog | undefined = undefined
+
+/**
+ * Spawns a Nitro subprocess.
+ * @returns A promise that resolves when the Nitro subprocess is started.
+ */
+function run(systemInfo?: SystemInformation): Promise<any> {
+  log(`[CORTEX]:: Spawning cortex subprocess...`)
+
+  return new Promise<void>(async (resolve, reject) => {
+    let executableOptions = executableCortexFile(
+      // If ngl is not set or equal to 0, run on CPU with correct instructions
+      systemInfo?.gpuSetting
+        ? {
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
+        : undefined
+    )
+
+    // Execute the binary
+    log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
+    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
+
+    // Add engine path to the PATH and LD_LIBRARY_PATH
+    process.env.PATH = (process.env.PATH || '').concat(
+      path.delimiter,
+      executableOptions.enginePath
+    )
+    log(`[CORTEX] PATH: ${process.env.PATH}`)
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      executableOptions.enginePath
+    )
+
+    watchdog = new ProcessWatchdog(
+      executableOptions.executablePath,
+      ['--start-server', '--port', LOCAL_PORT.toString()],
+      {
+        cwd: executableOptions.enginePath,
+        env: {
+          ...process.env,
+          ENGINE_PATH: executableOptions.enginePath,
+          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
+          // Vulkan - Support 1 device at a time for now
+          ...(executableOptions.vkVisibleDevices?.length > 0 && {
+            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
+          }),
+        },
+      }
+    )
+    watchdog.start()
+    resolve()
+  })
+}
+
+/**
+ * Every module should have a dispose function
+ * This will be called when the extension is unloaded and should clean up any resources
+ * Also called when app is closed
+ */
+function dispose() {
+  watchdog?.terminate()
+}
+
+/**
+ * Cortex process info
+ */
+export interface CortexProcessInfo {
+  isRunning: boolean
+}
+
+export default {
+  run,
+  dispose,
+}
diff --git a/extensions/inference-cortex-extension/src/node/watchdog.ts b/extensions/inference-cortex-extension/src/node/watchdog.ts
new file mode 100644
index 000000000..3e2b81d70
--- /dev/null
+++ b/extensions/inference-cortex-extension/src/node/watchdog.ts
@@ -0,0 +1,84 @@
+import { log } from '@janhq/core/node'
+import { spawn, ChildProcess } from 'child_process'
+import { EventEmitter } from 'events'
+
+interface WatchdogOptions {
+  cwd?: string
+  restartDelay?: number
+  maxRestarts?: number
+  env?: NodeJS.ProcessEnv
+}
+
+export class ProcessWatchdog extends EventEmitter {
+  private command: string
+  private args: string[]
+  private options: WatchdogOptions
+  private process: ChildProcess | null
+  private restartDelay: number
+  private maxRestarts: number
+  private restartCount: number
+  private isTerminating: boolean
+
+  constructor(command: string, args: string[], options: WatchdogOptions = {}) {
+    super()
+    this.command = command
+    this.args = args
+    this.options = options
+    this.process = null
+    this.restartDelay = options.restartDelay || 5000
+    this.maxRestarts = options.maxRestarts || 5
+    this.restartCount = 0
+    this.isTerminating = false
+  }
+
+  start(): void {
+    this.spawnProcess()
+  }
+
+  private spawnProcess(): void {
+    if (this.isTerminating) return
+
+    log(`Starting process: ${this.command} ${this.args.join(' ')}`)
+    this.process = spawn(this.command, this.args, this.options)
+
+    this.process.stdout?.on('data', (data: Buffer) => {
+      log(`Process output: ${data}`)
+      this.emit('output', data.toString())
+    })
+
+    this.process.stderr?.on('data', (data: Buffer) => {
+      log(`Process error: ${data}`)
+      this.emit('error', data.toString())
+    })
+
+    this.process.on('close', (code: number | null) => {
+      log(`Process exited with code ${code}`)
+      this.emit('close', code)
+      if (!this.isTerminating) {
+        this.restartProcess()
+      }
+    })
+  }
+
+  private restartProcess(): void {
+    if (this.restartCount < this.maxRestarts) {
+      this.restartCount++
+      log(
+        `Restarting process in ${this.restartDelay}ms (Attempt ${this.restartCount}/${this.maxRestarts})`
+      )
+      setTimeout(() => this.spawnProcess(), this.restartDelay)
+    } else {
+      log('Max restart attempts reached. Exiting watchdog.')
+      this.emit('maxRestartsReached')
+    }
+  }
+
+  terminate(): void {
+    this.isTerminating = true
+    if (this.process) {
+      log('Terminating watched process...')
+      this.process.kill()
+    }
+    this.emit('terminated')
+  }
+}
diff --git a/extensions/inference-nitro-extension/tsconfig.json b/extensions/inference-cortex-extension/tsconfig.json
similarity index 100%
rename from extensions/inference-nitro-extension/tsconfig.json
rename to extensions/inference-cortex-extension/tsconfig.json
diff --git a/extensions/inference-nitro-extension/bin/version.txt b/extensions/inference-nitro-extension/bin/version.txt
deleted file mode 100644
index 8f0916f76..000000000
--- a/extensions/inference-nitro-extension/bin/version.txt
+++ /dev/null
@@ -1 +0,0 @@
-0.5.0
diff --git a/extensions/inference-nitro-extension/download.sh b/extensions/inference-nitro-extension/download.sh
deleted file mode 100755
index 98ed8504a..000000000
--- a/extensions/inference-nitro-extension/download.sh
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/bin/bash
-
-# Read CORTEX_VERSION
-CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-
-# Detect platform
-OS_TYPE=$(uname)
-
-if [ "$OS_TYPE" == "Linux" ]; then
-    # Linux downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
-    chmod +x "./bin/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-linux-amd64"
-
-    # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
-
-elif [ "$OS_TYPE" == "Darwin" ]; then
-    # macOS downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-arm64.tar.gz"  -e --strip 1 -o "./bin/mac-arm64" 1
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-cpp-${CORTEX_VERSION}-mac-amd64.tar.gz"  -e --strip 1 -o "./bin/mac-x64" 1
-    chmod +x "./bin/mac-arm64/cortex-cpp"
-    chmod +x "./bin/mac-x64/cortex-cpp"
-
-    ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.25/cortex.llamacpp-0.1.25-mac"
-    # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
-
-else
-    echo "Unsupported operating system: $OS_TYPE"
-    exit 1
-fi
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
deleted file mode 100644
index 6e825e8fd..000000000
--- a/extensions/inference-nitro-extension/src/index.ts
+++ /dev/null
@@ -1,193 +0,0 @@
-/**
- * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- * @version 1.0.0
- * @module inference-extension/src/index
- */
-
-import {
-  events,
-  executeOnMain,
-  Model,
-  ModelEvent,
-  LocalOAIEngine,
-  InstallationState,
-  systemInformation,
-  fs,
-  getJanDataFolderPath,
-  joinPath,
-  DownloadRequest,
-  baseName,
-  downloadFile,
-  DownloadState,
-  DownloadEvent,
-  ModelFile,
-} from '@janhq/core'
-
-declare const CUDA_DOWNLOAD_URL: string
-/**
- * A class that implements the InferenceExtension interface from the @janhq/core package.
- * The class provides methods for initializing and stopping a model, and for making inference requests.
- * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
- */
-export default class JanInferenceNitroExtension extends LocalOAIEngine {
-  nodeModule: string = NODE
-  provider: string = 'nitro'
-
-  /**
-   * Checking the health for Nitro's process each 5 secs.
-   */
-  private static readonly _intervalHealthCheck = 5 * 1000
-
-  /**
-   * The interval id for the health check. Used to stop the health check.
-   */
-  private getNitroProcessHealthIntervalId: NodeJS.Timeout | undefined = undefined
-
-  /**
-   * Tracking the current state of nitro process.
-   */
-  private nitroProcessInfo: any = undefined
-
-  /**
-   * The URL for making inference requests.
-   */
-  inferenceUrl = ''
-
-  /**
-   * Subscribes to events emitted by the @janhq/core package.
-   */
-  async onLoad() {
-    this.inferenceUrl = INFERENCE_URL
-
-    // If the extension is running in the browser, use the base API URL from the core package.
-    if (!('electronAPI' in window)) {
-      this.inferenceUrl = `${window.core?.api?.baseApiUrl}/v1/chat/completions`
-    }
-
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    const models = MODELS as unknown as Model[]
-    this.registerModels(models)
-    super.onLoad()
-
-    // Add additional dependencies PATH to the env
-    executeOnMain(NODE, 'addAdditionalDependencies', {
-      name: this.name,
-      version: this.version,
-    })
-  }
-
-  /**
-   * Periodically check for nitro process's health.
-   */
-  private async periodicallyGetNitroHealth(): Promise<void> {
-    const health = await executeOnMain(NODE, 'getCurrentNitroProcessInfo')
-
-    const isRunning = this.nitroProcessInfo?.isRunning ?? false
-    if (isRunning && health.isRunning === false) {
-      console.debug('Nitro process is stopped')
-      events.emit(ModelEvent.OnModelStopped, {})
-    }
-    this.nitroProcessInfo = health
-  }
-
-  override loadModel(model: ModelFile): Promise<void> {
-    if (model.engine !== this.provider) return Promise.resolve()
-    this.getNitroProcessHealthIntervalId = setInterval(
-      () => this.periodicallyGetNitroHealth(),
-      JanInferenceNitroExtension._intervalHealthCheck
-    )
-    return super.loadModel(model)
-  }
-
-  override async unloadModel(model?: Model): Promise<void> {
-    if (model?.engine && model.engine !== this.provider) return
-
-    // stop the periocally health check
-    if (this.getNitroProcessHealthIntervalId) {
-      clearInterval(this.getNitroProcessHealthIntervalId)
-      this.getNitroProcessHealthIntervalId = undefined
-    }
-    return super.unloadModel(model)
-  }
-
-  override async install(): Promise<void> {
-    const info = await systemInformation()
-
-    const platform = info.osInfo?.platform === 'win32' ? 'windows' : 'linux'
-    const downloadUrl = CUDA_DOWNLOAD_URL
-
-    const url = downloadUrl
-      .replace('<version>', info.gpuSetting?.cuda?.version ?? '12.4')
-      .replace('<platform>', platform)
-
-    console.debug('Downloading Cuda Toolkit Dependency: ', url)
-
-    const janDataFolderPath = await getJanDataFolderPath()
-
-    const executableFolderPath = await joinPath([
-      janDataFolderPath,
-      'engines',
-      this.name ?? 'cortex-cpp',
-      this.version ?? '1.0.0',
-    ])
-
-    if (!(await fs.existsSync(executableFolderPath))) {
-      await fs.mkdir(executableFolderPath)
-    }
-
-    const tarball = await baseName(url)
-    const tarballFullPath = await joinPath([executableFolderPath, tarball])
-
-    const downloadRequest: DownloadRequest = {
-      url,
-      localPath: tarballFullPath,
-      extensionId: this.name,
-      downloadType: 'extension',
-    }
-    downloadFile(downloadRequest)
-
-    const onFileDownloadSuccess = async (state: DownloadState) => {
-      console.log(state)
-      // if other download, ignore
-      if (state.fileName !== tarball) return
-      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      await executeOnMain(
-        NODE,
-        'decompressRunner',
-        tarballFullPath,
-        executableFolderPath
-      )
-      events.emit(DownloadEvent.onFileUnzipSuccess, state)
-    }
-    events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-  }
-
-  override async installationState(): Promise<InstallationState> {
-    const info = await systemInformation()
-    if (
-      info.gpuSetting?.run_mode === 'gpu' &&
-      !info.gpuSetting?.vulkan &&
-      info.osInfo &&
-      info.osInfo.platform !== 'darwin' &&
-      !info.gpuSetting?.cuda?.exist
-    ) {
-      const janDataFolderPath = await getJanDataFolderPath()
-
-      const executableFolderPath = await joinPath([
-        janDataFolderPath,
-        'engines',
-        this.name ?? 'cortex-cpp',
-        this.version ?? '1.0.0',
-      ])
-
-      if (!(await fs.existsSync(executableFolderPath))) return 'NotInstalled'
-      return 'Installed'
-    }
-    return 'NotRequired'
-  }
-}
diff --git a/extensions/inference-nitro-extension/src/node/index.test.ts b/extensions/inference-nitro-extension/src/node/index.test.ts
deleted file mode 100644
index 6e64b4a06..000000000
--- a/extensions/inference-nitro-extension/src/node/index.test.ts
+++ /dev/null
@@ -1,465 +0,0 @@
-jest.mock('fetch-retry', () => ({
-  default: () => () => {
-    return Promise.resolve({
-      ok: true,
-      status: 200,
-      json: () =>
-        Promise.resolve({
-          model_loaded: true,
-        }),
-      text: () => Promise.resolve(''),
-    })
-  },
-}))
-
-jest.mock('path', () => ({
-  default: {
-    isAbsolute: jest.fn(),
-    join: jest.fn(),
-    parse: () => {
-      return { dir: 'dir' }
-    },
-    delimiter: { concat: () => '' },
-  },
-}))
-
-jest.mock('decompress', () => ({
-  default: () => {
-    return Promise.resolve()
-  },
-}))
-
-jest.mock('@janhq/core/node', () => ({
-  ...jest.requireActual('@janhq/core/node'),
-  getJanDataFolderPath: () => '',
-  getSystemResourceInfo: () => {
-    return {
-      cpu: {
-        cores: 1,
-        logicalCores: 1,
-        threads: 1,
-        model: 'model',
-        speed: 1,
-      },
-      memory: {
-        total: 1,
-        free: 1,
-      },
-      gpu: {
-        model: 'model',
-        memory: 1,
-        cuda: {
-          version: 'version',
-          devices: 'devices',
-        },
-        vulkan: {
-          version: 'version',
-          devices: 'devices',
-        },
-      },
-    }
-  },
-}))
-
-jest.mock('fs', () => ({
-  default: {
-    readdirSync: () => [],
-  },
-}))
-
-jest.mock('child_process', () => ({
-  exec: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-    }
-  },
-  spawn: () => {
-    return {
-      stdout: { on: jest.fn() },
-      stderr: { on: jest.fn() },
-      on: jest.fn(),
-      pid: '111',
-    }
-  },
-}))
-
-jest.mock('tcp-port-used', () => ({
-  default: {
-    waitUntilFree: () => Promise.resolve(true),
-    waitUntilUsed: () => Promise.resolve(true),
-  },
-}))
-
-jest.mock('./execute', () => ({
-  executableNitroFile: () => {
-    return {
-      enginePath: 'enginePath',
-      executablePath: 'executablePath',
-      cudaVisibleDevices: 'cudaVisibleDevices',
-      vkVisibleDevices: 'vkVisibleDevices',
-    }
-  },
-}))
-
-jest.mock('terminate', () => ({
-  default: (id: String, func: Function) => {
-    console.log(id)
-    func()
-  },
-}))
-
-import * as execute from './execute'
-import index from './index'
-
-let executeMock = execute
-
-const modelInitOptions: any = {
-  modelFolder: '/path/to/model',
-  model: {
-    id: 'test',
-    name: 'test',
-    engine: 'nitro',
-    version: '0.0',
-    format: 'GGUF',
-    object: 'model',
-    sources: [],
-    created: 0,
-    description: 'test',
-    parameters: {},
-    metadata: {
-      author: '',
-      tags: [],
-      size: 0,
-    },
-    settings: {
-      prompt_template: '{prompt}',
-      llama_model_path: 'model.gguf',
-    },
-  },
-}
-
-describe('loadModel', () => {
-  it('should load a model successfully', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Call the loadModel function
-    const result = await index.loadModel(modelInitOptions, systemInfo)
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    modelInitOptions.model.engine = 'not-nitro'
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-    modelInitOptions.model.engine = 'nitro'
-  })
-
-  it('should reject if model load failed with an error message', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_loaded: false,
-            }),
-          text: () => Promise.resolve('Failed to load model'),
-        })
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to load model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the necessary parameters and system information
-
-    const systemInfo = {
-      // Mock the system information if needed
-    }
-
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the loadModel function
-    try {
-      await index.loadModel(modelInitOptions, systemInfo)
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-
-  it('should run on GPU model if ngl is set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 40,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'gpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is not set', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: undefined,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-
-  it('should run on correct CPU instructions if ngl is 0', async () => {
-    const systemInfo: any = {
-      gpuSetting: {
-        run_mode: 'gpu',
-      },
-    }
-    // Spy executableNitroFile
-    jest.spyOn(executeMock, 'executableNitroFile').mockReturnValue({
-      enginePath: '',
-      executablePath: '',
-      cudaVisibleDevices: '',
-      vkVisibleDevices: '',
-    })
-
-    Object.defineProperty(process, 'platform', { value: 'win32' })
-    await index.loadModel(
-      {
-        ...modelInitOptions,
-        model: {
-          ...modelInitOptions.model,
-          settings: {
-            ...modelInitOptions.model.settings,
-            ngl: 0,
-          },
-        },
-      },
-      systemInfo
-    )
-    expect(executeMock.executableNitroFile).toHaveBeenCalledWith({
-      run_mode: 'cpu',
-    })
-  })
-})
-
-describe('unloadModel', () => {
-  it('should unload a model successfully', async () => {
-    // Call the unloadModel function
-    const result = await index.unloadModel()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should reject with an error message if the model is not a nitro model', async () => {
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Not a cortex model')
-    }
-  })
-
-  it('should reject if model unload failed with an error message', async () => {
-    // Mock the fetch-retry module to return a failed response
-    jest.mock('fetch-retry', () => ({
-      default: () => () => {
-        return Promise.resolve({
-          ok: false,
-          status: 500,
-          json: () =>
-            Promise.resolve({
-              model_unloaded: false,
-            }),
-          text: () => Promise.resolve('Failed to unload model'),
-        })
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Failed to unload model')
-    }
-  })
-
-  it('should reject if port not available', async () => {
-    // Mock the tcp-port-used module to return false
-    jest.mock('tcp-port-used', () => ({
-      default: {
-        waitUntilFree: () => Promise.resolve(false),
-        waitUntilUsed: () => Promise.resolve(false),
-      },
-    }))
-
-    // Call the unloadModel function
-    try {
-      await index.unloadModel()
-    } catch (error) {
-      // Assert that the error message is as expected
-      expect(error).toBe('Port not available')
-    }
-  })
-})
-describe('dispose', () => {
-  it('should dispose a model successfully on Mac', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'darwin',
-    })
-
-    // Call the dispose function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-
-  it('should kill the subprocess successfully on Windows', async () => {
-    Object.defineProperty(process, 'platform', {
-      value: 'win32',
-    })
-
-    // Call the killSubprocess function
-    const result = await index.dispose()
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('getCurrentNitroProcessInfo', () => {
-  it('should return the current nitro process info', async () => {
-    // Call the getCurrentNitroProcessInfo function
-    const result = await index.getCurrentNitroProcessInfo()
-
-    // Assert that the result is as expected
-    expect(result).toEqual({
-      isRunning: true,
-    })
-  })
-})
-
-describe('decompressRunner', () => {
-  it('should decompress the runner successfully', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.resolve()
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-  it('should not reject if decompression failed', async () => {
-    jest.mock('decompress', () => ({
-      default: () => {
-        return Promise.reject('Failed to decompress')
-      },
-    }))
-    // Call the decompressRunner function
-    const result = await index.decompressRunner('', '')
-    expect(result).toBeUndefined()
-  })
-})
-
-describe('addAdditionalDependencies', () => {
-  it('should add additional dependencies successfully', async () => {
-    // Call the addAdditionalDependencies function
-    const result = await index.addAdditionalDependencies({
-      name: 'name',
-      version: 'version',
-    })
-
-    // Assert that the result is as expected
-    expect(result).toBeUndefined()
-  })
-})
diff --git a/extensions/inference-nitro-extension/src/node/index.ts b/extensions/inference-nitro-extension/src/node/index.ts
deleted file mode 100644
index 98ca4572f..000000000
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ /dev/null
@@ -1,501 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
-import tcpPortUsed from 'tcp-port-used'
-import fetchRT from 'fetch-retry'
-import {
-  log,
-  getSystemResourceInfo,
-  InferenceEngine,
-  ModelSettingParams,
-  PromptTemplate,
-  SystemInformation,
-  getJanDataFolderPath,
-  ModelFile,
-} from '@janhq/core/node'
-import { executableNitroFile } from './execute'
-import terminate from 'terminate'
-import decompress from 'decompress'
-
-// Polyfill fetch with retry
-const fetchRetry = fetchRT(fetch)
-
-/**
- * The response object for model init operation.
- */
-interface ModelInitOptions {
-  modelFolder: string
-  model: ModelFile
-}
-// The PORT to use for the Nitro subprocess
-const PORT = 3928
-// The HOST address to use for the Nitro subprocess
-const LOCAL_HOST = '127.0.0.1'
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
-// The URL for the Nitro subprocess to load a model
-const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-// The URL for the Nitro subprocess to kill itself
-const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-const NITRO_PORT_FREE_CHECK_INTERVAL = 100
-
-// The supported model format
-// TODO: Should be an array to support more models
-const SUPPORTED_MODEL_FORMAT = '.gguf'
-
-// The subprocess instance for Nitro
-let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
-
-// The current model settings
-let currentSettings: (ModelSettingParams & { model?: string }) | undefined =
-  undefined
-
-/**
- * Stops a Nitro subprocess.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-function unloadModel(): Promise<void> {
-  return killSubprocess()
-}
-
-/**
- * Initializes a Nitro subprocess to load a machine learning model.
- * @param wrapper - The model wrapper.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
- */
-async function loadModel(
-  params: ModelInitOptions,
-  systemInfo?: SystemInformation
-): Promise<ModelOperationResponse | void> {
-  if (params.model.engine !== InferenceEngine.nitro) {
-    // Not a nitro model
-    return Promise.resolve()
-  }
-
-  if (params.model.engine !== InferenceEngine.nitro) {
-    return Promise.reject('Not a cortex model')
-  } else {
-    const nitroResourceProbe = await getSystemResourceInfo()
-    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
-    if (params.model.settings.prompt_template) {
-      const promptTemplate = params.model.settings.prompt_template
-      const prompt = promptTemplateConverter(promptTemplate)
-      if (prompt?.error) {
-        return Promise.reject(prompt.error)
-      }
-      params.model.settings.system_prompt = prompt.system_prompt
-      params.model.settings.user_prompt = prompt.user_prompt
-      params.model.settings.ai_prompt = prompt.ai_prompt
-    }
-
-    // modelFolder is the absolute path to the running model folder
-    // e.g. ~/jan/models/llama-2
-    let modelFolder = params.modelFolder
-
-    let llama_model_path = params.model.settings.llama_model_path
-
-    // Absolute model path support
-    if (
-      params.model?.sources.length &&
-      params.model.sources.every((e) => fs.existsSync(e.url))
-    ) {
-      llama_model_path =
-        params.model.sources.length === 1
-          ? params.model.sources[0].url
-          : params.model.sources.find((e) =>
-              e.url.includes(llama_model_path ?? params.model.id)
-            )?.url
-    }
-
-    if (!llama_model_path || !path.isAbsolute(llama_model_path)) {
-      // Look for GGUF model file
-      const modelFiles: string[] = fs.readdirSync(modelFolder)
-      const ggufBinFile = modelFiles.find(
-        (file) =>
-          // 1. Prioritize llama_model_path (predefined)
-          (llama_model_path && file === llama_model_path) ||
-          // 2. Prioritize GGUF File (manual import)
-          file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ||
-          // 3. Fallback Model ID (for backward compatibility)
-          file === params.model.id
-      )
-      if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile)
-    }
-
-    // Look for absolute source path for single model
-
-    if (!llama_model_path) return Promise.reject('No GGUF model file found')
-
-    currentSettings = {
-      cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
-      // model.settings can override the default settings
-      ...params.model.settings,
-      llama_model_path,
-      model: params.model.id,
-      // This is critical and requires real CPU physical core count (or performance core)
-      ...(params.model.settings.mmproj && {
-        mmproj: path.isAbsolute(params.model.settings.mmproj)
-          ? params.model.settings.mmproj
-          : path.join(modelFolder, params.model.settings.mmproj),
-      }),
-    }
-    return runNitroAndLoadModel(params.model.id, systemInfo)
-  }
-}
-
-/**
- * 1. Spawn Nitro process
- * 2. Load model into Nitro subprocess
- * 3. Validate model status
- * @returns
- */
-async function runNitroAndLoadModel(
-  modelId: string,
-  systemInfo?: SystemInformation
-) {
-  // Gather system information for CPU physical cores and memory
-  return killSubprocess()
-    .then(() =>
-      tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-    )
-    .then(() => spawnNitroProcess(systemInfo))
-    .then(() => loadLLMModel(currentSettings))
-    .then(() => validateModelStatus(modelId))
-    .catch((err) => {
-      // TODO: Broadcast error so app could display proper error message
-      log(`[CORTEX]::Error: ${err}`)
-      return { error: err }
-    })
-}
-
-/**
- * Parse prompt template into agrs settings
- * @param promptTemplate Template as string
- * @returns
- */
-function promptTemplateConverter(promptTemplate: string): PromptTemplate {
-  // Split the string using the markers
-  const systemMarker = '{system_message}'
-  const promptMarker = '{prompt}'
-
-  if (
-    promptTemplate.includes(systemMarker) &&
-    promptTemplate.includes(promptMarker)
-  ) {
-    // Find the indices of the markers
-    const systemIndex = promptTemplate.indexOf(systemMarker)
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-
-    // Extract the parts of the string
-    const system_prompt = promptTemplate.substring(0, systemIndex)
-    const user_prompt = promptTemplate.substring(
-      systemIndex + systemMarker.length,
-      promptIndex
-    )
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { system_prompt, user_prompt, ai_prompt }
-  } else if (promptTemplate.includes(promptMarker)) {
-    // Extract the parts of the string for the case where only promptMarker is present
-    const promptIndex = promptTemplate.indexOf(promptMarker)
-    const user_prompt = promptTemplate.substring(0, promptIndex)
-    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length
-    )
-
-    // Return the split parts
-    return { user_prompt, ai_prompt }
-  }
-
-  // Return an error if none of the conditions are met
-  return { error: 'Cannot split prompt template' }
-}
-
-/**
- * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- */
-function loadLLMModel(settings: any): Promise<Response> {
-  if (!settings?.ngl) {
-    settings.ngl = 100
-  }
-  log(`[CORTEX]:: Loading model with params ${JSON.stringify(settings)}`)
-  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
-    method: 'POST',
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    body: JSON.stringify(settings),
-    retries: 3,
-    retryDelay: 300,
-  })
-    .then((res) => {
-      log(
-        `[CORTEX]:: Load model success with response ${JSON.stringify(
-          res
-        )}`
-      )
-      return Promise.resolve(res)
-    })
-    .catch((err) => {
-      log(`[CORTEX]::Error: Load model failed with error ${err}`)
-      return Promise.reject(err)
-    })
-}
-
-/**
- * Validates the status of a model.
- * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
- * If the model is loaded successfully, the object is empty.
- * If the model is not loaded successfully, the object contains an error message.
- */
-async function validateModelStatus(modelId: string): Promise<void> {
-  // Send a GET request to the validation URL.
-  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
-  log(`[CORTEX]:: Validating model ${modelId}`)
-  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
-    method: 'POST',
-    body: JSON.stringify({
-      model: modelId,
-      // TODO: force to use cortex llamacpp by default
-      engine: 'cortex.llamacpp',
-    }),
-    headers: {
-      'Content-Type': 'application/json',
-    },
-    retries: 5,
-    retryDelay: 300,
-  }).then(async (res: Response) => {
-    log(
-      `[CORTEX]:: Validate model state with response ${JSON.stringify(
-        res.status
-      )}`
-    )
-    // If the response is OK, check model_loaded status.
-    if (res.ok) {
-      const body = await res.json()
-      // If the model is loaded, return an empty object.
-      // Otherwise, return an object with an error message.
-      if (body.model_loaded) {
-        log(
-          `[CORTEX]:: Validate model state success with response ${JSON.stringify(
-            body
-          )}`
-        )
-        return Promise.resolve()
-      }
-    }
-    const errorBody = await res.text()
-    log(
-      `[CORTEX]:: Validate model state failed with response ${errorBody} and status is ${JSON.stringify(
-        res.statusText
-      )}`
-    )
-    return Promise.reject('Validate model status failed')
-  })
-}
-
-/**
- * Terminates the Nitro subprocess.
- * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
- */
-async function killSubprocess(): Promise<void> {
-  const controller = new AbortController()
-  setTimeout(() => controller.abort(), 5000)
-  log(`[CORTEX]:: Request to kill cortex`)
-
-  const killRequest = () => {
-    return fetch(NITRO_HTTP_KILL_URL, {
-      method: 'DELETE',
-      signal: controller.signal,
-    })
-      .catch(() => {}) // Do nothing with this attempt
-      .then(() =>
-        tcpPortUsed.waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-      )
-      .then(() => log(`[CORTEX]:: cortex process is terminated`))
-      .catch((err) => {
-        log(
-          `[CORTEX]:: Could not kill running process on port ${PORT}. Might be another process running on the same port? ${err}`
-        )
-        throw 'PORT_NOT_AVAILABLE'
-      })
-  }
-
-  if (subprocess?.pid && process.platform !== 'darwin') {
-    log(`[CORTEX]:: Killing PID ${subprocess.pid}`)
-    const pid = subprocess.pid
-    return new Promise((resolve, reject) => {
-      terminate(pid, function (err) {
-        if (err) {
-          log('[CORTEX]::Failed to kill PID - sending request to kill')
-          killRequest().then(resolve).catch(reject)
-        } else {
-          tcpPortUsed
-            .waitUntilFree(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 5000)
-            .then(() => log(`[CORTEX]:: cortex process is terminated`))
-            .then(() => resolve())
-            .catch(() => {
-              log(
-                '[CORTEX]::Failed to kill PID (Port check timeout) - sending request to kill'
-              )
-              killRequest().then(resolve).catch(reject)
-            })
-        }
-      })
-    })
-  } else {
-    return killRequest()
-  }
-}
-
-/**
- * Spawns a Nitro subprocess.
- * @returns A promise that resolves when the Nitro subprocess is started.
- */
-function spawnNitroProcess(systemInfo?: SystemInformation): Promise<any> {
-  log(`[CORTEX]:: Spawning cortex subprocess...`)
-
-  return new Promise<void>(async (resolve, reject) => {
-    let executableOptions = executableNitroFile(
-      // If ngl is not set or equal to 0, run on CPU with correct instructions
-      systemInfo?.gpuSetting
-        ? {
-            ...systemInfo.gpuSetting,
-            run_mode:
-              currentSettings?.ngl === undefined || currentSettings.ngl === 0
-                ? 'cpu'
-                : systemInfo.gpuSetting.run_mode,
-          }
-        : undefined
-    )
-
-    const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
-    // Execute the binary
-    log(
-      `[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}, and args: ${args}`
-    )
-    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
-
-    // Add engine path to the PATH and LD_LIBRARY_PATH
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
-    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath
-    )
-
-    subprocess = spawn(
-      executableOptions.executablePath,
-      ['1', LOCAL_HOST, PORT.toString()],
-      {
-        cwd: path.join(path.parse(executableOptions.executablePath).dir),
-        env: {
-          ...process.env,
-          ENGINE_PATH: executableOptions.enginePath,
-          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
-          // Vulkan - Support 1 device at a time for now
-          ...(executableOptions.vkVisibleDevices?.length > 0 && {
-            GGML_VULKAN_DEVICE: executableOptions.vkVisibleDevices[0],
-          }),
-        },
-      }
-    )
-
-    // Handle subprocess output
-    subprocess.stdout.on('data', (data: any) => {
-      log(`[CORTEX]:: ${data}`)
-    })
-
-    subprocess.stderr.on('data', (data: any) => {
-      log(`[CORTEX]::Error: ${data}`)
-    })
-
-    subprocess.on('close', (code: any) => {
-      log(`[CORTEX]:: cortex exited with code: ${code}`)
-      subprocess = undefined
-      reject(`child process exited with code ${code}`)
-    })
-
-    tcpPortUsed
-      .waitUntilUsed(PORT, NITRO_PORT_FREE_CHECK_INTERVAL, 30000)
-      .then(() => {
-        log(`[CORTEX]:: cortex is ready`)
-        resolve()
-      })
-  })
-}
-
-/**
- * Every module should have a dispose function
- * This will be called when the extension is unloaded and should clean up any resources
- * Also called when app is closed
- */
-function dispose() {
-  // clean other registered resources here
-  killSubprocess()
-}
-
-/**
- * Nitro process info
- */
-export interface NitroProcessInfo {
-  isRunning: boolean
-}
-
-/**
- * Retrieve current nitro process
- */
-const getCurrentNitroProcessInfo = (): NitroProcessInfo => {
-  return {
-    isRunning: subprocess != null,
-  }
-}
-
-const addAdditionalDependencies = (data: { name: string; version: string }) => {
-  log(
-    `[CORTEX]::Debug: Adding additional dependencies for ${data.name} ${data.version}`
-  )
-  const additionalPath = path.delimiter.concat(
-    path.join(getJanDataFolderPath(), 'engines', data.name, data.version)
-  )
-  // Set the updated PATH
-  process.env.PATH = (process.env.PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-  process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-    path.delimiter,
-    additionalPath
-  )
-}
-
-const decompressRunner = async (zipPath: string, output: string) => {
-  console.debug(`Decompressing ${zipPath} to ${output}...`)
-  try {
-    const files = await decompress(zipPath, output)
-    console.debug('Decompress finished!', files)
-  } catch (err) {
-    console.error(`Decompress ${zipPath} failed: ${err}`)
-  }
-}
-
-export default {
-  loadModel,
-  unloadModel,
-  dispose,
-  getCurrentNitroProcessInfo,
-  addAdditionalDependencies,
-  decompressRunner,
-}
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index 3a694e5a0..bd834454a 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -4,7 +4,6 @@
   "version": "1.0.34",
   "description": "Model Management Extension provides model exploration and seamless downloads",
   "main": "dist/index.js",
-  "node": "dist/node/index.cjs.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
   "scripts": {
@@ -36,15 +35,9 @@
     "README.md"
   ],
   "dependencies": {
-    "@huggingface/gguf": "^0.0.11",
-    "@huggingface/jinja": "^0.3.0",
     "@janhq/core": "file:../../core",
-    "hyllama": "^0.2.2",
-    "python-shell": "^5.0.0"
+    "ky": "^1.7.2",
+    "p-queue": "^8.0.1"
   },
-  "bundleDependencies": [
-    "hyllama",
-    "@huggingface/gguf",
-    "@huggingface/jinja"
-  ]
+  "bundleDependencies": []
 }
diff --git a/extensions/model-extension/resources/default-model.json b/extensions/model-extension/resources/default-model.json
deleted file mode 100644
index c02008cd6..000000000
--- a/extensions/model-extension/resources/default-model.json
+++ /dev/null
@@ -1,36 +0,0 @@
-{
-  "object": "model",
-  "version": "1.0",
-  "format": "gguf",
-  "sources": [
-    {
-      "url": "N/A",
-      "filename": "N/A"
-    }
-  ],
-  "id": "N/A",
-  "name": "N/A",
-  "created": 0,
-  "description": "User self import model",
-  "settings": {
-    "ctx_len": 2048,
-    "embedding": false,
-    "prompt_template": "{system_message}\n### Instruction: {prompt}\n### Response:",
-    "llama_model_path": "N/A"
-  },
-  "parameters": {
-    "temperature": 0.7,
-    "top_p": 0.95,
-    "stream": true,
-    "max_tokens": 2048,
-    "stop": ["<|END_OF_TURN_TOKEN|>", "<end_of_turn>", "[/INST]", "<|end_of_text|>", "<|eot_id|>", "<|im_end|>", "<|end|>"],
-    "frequency_penalty": 0,
-    "presence_penalty": 0
-  },
-  "metadata": {
-    "author": "User",
-    "tags": [],
-    "size": 0
-  },
-  "engine": "nitro"
-}
diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts
index d36d8ffac..6e506140f 100644
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@@ -6,7 +6,6 @@ import replace from '@rollup/plugin-replace'
 import commonjs from '@rollup/plugin-commonjs'
 const settingJson = require('./resources/settings.json')
 const packageJson = require('./package.json')
-const defaultModelJson = require('./resources/default-model.json')
 
 export default [
   {
@@ -20,17 +19,18 @@ export default [
     plugins: [
       replace({
         preventAssignment: true,
-        DEFAULT_MODEL: JSON.stringify(defaultModelJson),
         SETTINGS: JSON.stringify(settingJson),
-        NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
       }),
       // Allow json resolution
       json(),
       //     Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
+      typescript({
+        useTsconfigDeclarationDir: true,
+        exclude: ['**/__tests__', '**/*.test.ts'],
+      }),
       // Compile TypeScript files
       // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      // commonjs(),
+      commonjs(),
       // Allow node_modules resolution, so you can use 'external' to control
       // which external modules to include in the bundle
       // https://github.com/rollup/rollup-plugin-node-resolve#usage
@@ -39,39 +39,6 @@ export default [
         browser: true,
       }),
 
-      // Resolve source maps to the original source
-      sourceMaps(),
-    ],
-  },
-  {
-    input: `src/node/index.ts`,
-    output: [
-      {
-        file: 'dist/node/index.cjs.js',
-        format: 'cjs',
-        sourcemap: true,
-        inlineDynamicImports: true,
-      },
-    ],
-    // Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
-    external: ['@janhq/core/node'],
-    watch: {
-      include: 'src/node/**',
-    },
-    plugins: [
-      // Allow json resolution
-      json(),
-      // Compile TypeScript files
-      typescript({ useTsconfigDeclarationDir: true, exclude: ['**/__tests__', '**/*.test.ts'], }),
-      // Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
-      commonjs(),
-      // Allow node_modules resolution, so you can use 'external' to control
-      // which external modules to include in the bundle
-      // https://github.com/rollup/rollup-plugin-node-resolve#usage
-      resolve({
-        extensions: ['.ts', '.js', '.json'],
-      }),
-
       // Resolve source maps to the original source
       sourceMaps(),
     ],
diff --git a/extensions/model-extension/src/@types/InvalidHostError.ts b/extensions/model-extension/src/@types/InvalidHostError.ts
deleted file mode 100644
index 47262206e..000000000
--- a/extensions/model-extension/src/@types/InvalidHostError.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export class InvalidHostError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'InvalidHostError'
-  }
-}
diff --git a/extensions/model-extension/src/@types/NotSupportModelError.ts b/extensions/model-extension/src/@types/NotSupportModelError.ts
deleted file mode 100644
index 0a1946176..000000000
--- a/extensions/model-extension/src/@types/NotSupportModelError.ts
+++ /dev/null
@@ -1,6 +0,0 @@
-export class NotSupportedModelError extends Error {
-  constructor(message: string) {
-    super(message)
-    this.name = 'NotSupportedModelError'
-  }
-}
diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts
index 3878d4bf2..01bd272f2 100644
--- a/extensions/model-extension/src/@types/global.d.ts
+++ b/extensions/model-extension/src/@types/global.d.ts
@@ -1,6 +1,5 @@
 export {}
 declare global {
-  declare const DEFAULT_MODEL: object
   declare const NODE: string
 
   interface Core {
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
new file mode 100644
index 000000000..685bf3b9f
--- /dev/null
+++ b/extensions/model-extension/src/cortex.ts
@@ -0,0 +1,166 @@
+import PQueue from 'p-queue'
+import ky from 'ky'
+import {
+  DownloadEvent,
+  events,
+  Model,
+  ModelRuntimeParams,
+  ModelSettingParams,
+} from '@janhq/core'
+/**
+ * cortex.cpp Model APIs interface
+ */
+interface ICortexAPI {
+  getModel(model: string): Promise<Model>
+  getModels(): Promise<Model[]>
+  pullModel(model: string): Promise<void>
+  importModel(path: string, modelPath: string): Promise<void>
+  deleteModel(model: string): Promise<void>
+  updateModel(model: object): Promise<void>
+  cancelModelPull(model: string): Promise<void>
+}
+/**
+ * Simple CortexAPI service
+ * It could be replaced by cortex client sdk later on
+ */
+const API_URL = 'http://127.0.0.1:39291'
+const SOCKET_URL = 'ws://127.0.0.1:39291'
+
+type ModelList = {
+  data: any[]
+}
+
+export class CortexAPI implements ICortexAPI {
+  queue = new PQueue({ concurrency: 1 })
+  socket?: WebSocket = undefined
+
+  constructor() {
+    this.queue.add(() => this.healthz())
+    this.subscribeToEvents()
+  }
+
+  getModel(model: string): Promise<any> {
+    return this.queue.add(() =>
+      ky
+        .get(`${API_URL}/v1/models/${model}`)
+        .json()
+        .then((e) => this.transformModel(e))
+    )
+  }
+
+  getModels(): Promise<Model[]> {
+    return this.queue
+      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .then((e) =>
+        typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
+      )
+  }
+
+  pullModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/pull`, { json: { model } })
+        .json()
+        .catch(async (e) => {
+          throw (await e.response?.json()) ?? e
+        })
+        .then()
+    )
+  }
+
+  importModel(model: string, modelPath: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .post(`${API_URL}/v1/models/import`, { json: { model, modelPath } })
+        .json()
+        .catch((e) => console.debug(e)) // Ignore error
+        .then()
+    )
+  }
+
+  deleteModel(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky.delete(`${API_URL}/models/${model}`).json().then()
+    )
+  }
+
+  updateModel(model: object): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .patch(`${API_URL}/v1/models/${model}`, { json: { model } })
+        .json()
+        .then()
+    )
+  }
+  cancelModelPull(model: string): Promise<void> {
+    return this.queue.add(() =>
+      ky
+        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .json()
+        .then()
+    )
+  }
+
+  healthz(): Promise<void> {
+    return ky
+      .get(`${API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => {})
+  }
+
+  subscribeToEvents() {
+    this.queue.add(
+      () =>
+        new Promise<void>((resolve) => {
+          this.socket = new WebSocket(`${SOCKET_URL}/events`)
+          console.log('Socket connected')
+
+          this.socket.addEventListener('message', (event) => {
+            const data = JSON.parse(event.data)
+            const transferred = data.task.items.reduce(
+              (accumulator, currentValue) =>
+                accumulator + currentValue.downloadedBytes,
+              0
+            )
+            const total = data.task.items.reduce(
+              (accumulator, currentValue) => accumulator + currentValue.bytes,
+              0
+            )
+            const percent = ((transferred ?? 1) / (total ?? 1)) * 100
+
+            events.emit(data.type, {
+              modelId: data.task.id,
+              percent: percent,
+              size: {
+                transferred: transferred,
+                total: total,
+              },
+            })
+          })
+          resolve()
+        })
+    )
+  }
+
+  private transformModel(model: any) {
+    model.parameters = setParameters<ModelRuntimeParams>(model)
+    model.settings = setParameters<ModelSettingParams>(model)
+    model.metadata = {
+      tags: [],
+    }
+    return model as Model
+  }
+}
+
+type FilteredParams<T> = {
+  [K in keyof T]: T[K]
+}
+
+function setParameters<T>(params: T): T {
+  const filteredParams: FilteredParams<T> = { ...params }
+  return filteredParams
+}
diff --git a/extensions/model-extension/src/helpers/path.test.ts b/extensions/model-extension/src/helpers/path.test.ts
deleted file mode 100644
index 64ca65d8a..000000000
--- a/extensions/model-extension/src/helpers/path.test.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import { extractFileName } from './path';
-
-describe('extractFileName Function', () => {
-  it('should correctly extract the file name with the provided file extension', () => {
-    const url = 'http://example.com/some/path/to/file.ext';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.ext');
-  });
-
-  it('should correctly append the file extension if it does not already exist in the file name', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should handle cases where the URL does not have a file extension correctly', () => {
-    const url = 'http://example.com/some/path/to/file';
-    const fileExtension = '.jpg';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.jpg');
-  });
-
-  it('should correctly handle URLs without a trailing slash', () => {
-    const url = 'http://example.com/some/path/tofile';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.txt');
-  });
-
-  it('should correctly handle URLs with multiple file extensions', () => {
-    const url = 'http://example.com/some/path/tofile.tar.gz';
-    const fileExtension = '.gz';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tofile.tar.gz');
-  });
-
-  it('should correctly handle URLs with special characters', () => {
-    const url = 'http://example.com/some/path/tófílë.extë';
-    const fileExtension = '.extë';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('tófílë.extë');
-  });
-
-  it('should correctly handle URLs that are just a file with no path', () => {
-    const url = 'http://example.com/file.txt';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('file.txt');
-  });
-
-  it('should correctly handle URLs that have special query parameters', () => {
-    const url = 'http://example.com/some/path/tofile.ext?query=1';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url.split('?')[0], fileExtension);
-    expect(fileName).toBe('tofile.ext');
-  });
-
-  it('should correctly handle URLs that have uppercase characters', () => {
-    const url = 'http://EXAMPLE.COM/PATH/TO/FILE.EXT';
-    const fileExtension = '.ext';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('FILE.EXT');
-  });
-
-  it('should correctly handle invalid URLs', () => {
-    const url = 'invalid-url';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('invalid-url.txt');
-  });
-
-  it('should correctly handle empty URLs', () => {
-    const url = '';
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-
-  it('should correctly handle undefined URLs', () => {
-    const url = undefined;
-    const fileExtension = '.txt';
-    const fileName = extractFileName(url as any, fileExtension);
-    expect(fileName).toBe('.txt');
-  });
-});
diff --git a/extensions/model-extension/src/helpers/path.ts b/extensions/model-extension/src/helpers/path.ts
deleted file mode 100644
index 6091005b8..000000000
--- a/extensions/model-extension/src/helpers/path.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-/**
- *  try to retrieve the download file name from the source url
- */
-
-export function extractFileName(url: string, fileExtension: string): string {
-  if(!url) return fileExtension
-
-  const extractedFileName = url.split('/').pop()
-  const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
-    ? extractedFileName
-    : extractedFileName + fileExtension
-  return fileName
-}
diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts
index 3f804b6d6..05598c30d 100644
--- a/extensions/model-extension/src/index.test.ts
+++ b/extensions/model-extension/src/index.test.ts
@@ -1,846 +1,90 @@
-/**
- * @jest-environment jsdom
- */
-const readDirSyncMock = jest.fn()
-const existMock = jest.fn()
-const readFileSyncMock = jest.fn()
-const downloadMock = jest.fn()
-const mkdirMock = jest.fn()
-const writeFileSyncMock = jest.fn()
-const copyFileMock = jest.fn()
-const dirNameMock = jest.fn()
-const executeMock = jest.fn()
+import JanModelExtension from './index'
+import { Model } from '@janhq/core'
+
+let SETTINGS = []
+// @ts-ignore
+global.SETTINGS = SETTINGS
 
 jest.mock('@janhq/core', () => ({
   ...jest.requireActual('@janhq/core/node'),
   events: {
     emit: jest.fn(),
   },
-  fs: {
-    existsSync: existMock,
-    readdirSync: readDirSyncMock,
-    readFileSync: readFileSyncMock,
-    writeFileSync: writeFileSyncMock,
-    mkdir: mkdirMock,
-    copyFile: copyFileMock,
-    fileStat: () => ({
-      isDirectory: false,
-    }),
-  },
-  dirName: dirNameMock,
   joinPath: (paths) => paths.join('/'),
-  ModelExtension: jest.fn(),
-  downloadFile: downloadMock,
-  executeOnMain: executeMock,
+  ModelExtension: jest.fn().mockImplementation(function () {
+    // @ts-ignore
+    this.registerSettings = () => {
+      return Promise.resolve()
+    }
+    // @ts-ignore
+    return this
+  }),
 }))
 
-jest.mock('@huggingface/gguf')
-
-global.fetch = jest.fn(() =>
-  Promise.resolve({
-    json: () => Promise.resolve({ test: 100 }),
-    arrayBuffer: jest.fn(),
-  })
-) as jest.Mock
-
-import JanModelExtension from '.'
-import { fs, dirName } from '@janhq/core'
-import { gguf } from '@huggingface/gguf'
-
 describe('JanModelExtension', () => {
-  let sut: JanModelExtension
-
-  beforeAll(() => {
-    // @ts-ignore
-    sut = new JanModelExtension()
-  })
+  let extension: JanModelExtension
+  let mockCortexAPI: any
 
   beforeEach(() => {
-    jest.clearAllMocks()
-  })
-
-  describe('getConfiguredModels', () => {
-    describe("when there's no models are pre-populated", () => {
-      it('should return empty array', async () => {
-        // Mock configured models data
-        const configuredModels = []
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual([])
-      })
-    })
-
-    describe("when there's are pre-populated models - all flattened", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe("when there's are pre-populated models - there are nested folders", () => {
-      it('returns configured models data - flatten folder - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else return ['model.json']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else if (path.includes('model2/model2-1'))
-            return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getConfiguredModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('getDownloadedModels', () => {
-    describe('no models downloaded', () => {
-      it('should return empty array', async () => {
-        // Mock downloaded models data
-        existMock.mockReturnValue(true)
-        readDirSyncMock.mockReturnValue([])
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual([])
-      })
-    })
-    describe('only one model is downloaded', () => {
-      describe('flatten folder', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2']
-            else if (path === 'file://models/model1')
-              return ['model.json', 'test.gguf']
-            else return ['model.json']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded', () => {
-      describe('nested folders', () => {
-        it('returns downloaded models - with correct file_path and model id', async () => {
-          // Mock configured models data
-          const configuredModels = [
-            {
-              id: '1',
-              name: 'Model 1',
-              version: '1.0.0',
-              description: 'Model 1 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model1',
-              },
-              format: 'onnx',
-              sources: [],
-              created: new Date(),
-              updated: new Date(),
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-            {
-              id: '2',
-              name: 'Model 2',
-              version: '2.0.0',
-              description: 'Model 2 description',
-              object: {
-                type: 'model',
-                uri: 'http://localhost:5000/models/model2',
-              },
-              format: 'onnx',
-              sources: [],
-              parameters: {},
-              settings: {},
-              metadata: {},
-              engine: 'test',
-            } as any,
-          ]
-          existMock.mockReturnValue(true)
-
-          readDirSyncMock.mockImplementation((path) => {
-            if (path === 'file://models') return ['model1', 'model2/model2-1']
-            else return ['model.json', 'test.gguf']
-          })
-
-          readFileSyncMock.mockImplementation((path) => {
-            if (path.includes('model1'))
-              return JSON.stringify(configuredModels[0])
-            else return JSON.stringify(configuredModels[1])
-          })
-
-          const result = await sut.getDownloadedModels()
-          expect(result).toEqual(
-            expect.arrayContaining([
-              expect.objectContaining({
-                file_path: 'file://models/model1/model.json',
-                id: '1',
-              }),
-              expect.objectContaining({
-                file_path: 'file://models/model2/model2-1/model.json',
-                id: '2',
-              }),
-            ])
-          )
-        })
-      })
-    })
-
-    describe('all models are downloaded with uppercased GGUF files', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.GGUF']
-          else return ['model.json', 'test.gguf']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-
-    describe('all models are downloaded - GGUF & Tensort RT', () => {
-      it('returns downloaded models - with correct file_path and model id', async () => {
-        // Mock configured models data
-        const configuredModels = [
-          {
-            id: '1',
-            name: 'Model 1',
-            version: '1.0.0',
-            description: 'Model 1 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model1',
-            },
-            format: 'onnx',
-            sources: [],
-            created: new Date(),
-            updated: new Date(),
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-          {
-            id: '2',
-            name: 'Model 2',
-            version: '2.0.0',
-            description: 'Model 2 description',
-            object: {
-              type: 'model',
-              uri: 'http://localhost:5000/models/model2',
-            },
-            format: 'onnx',
-            sources: [],
-            parameters: {},
-            settings: {},
-            metadata: {},
-            engine: 'test',
-          } as any,
-        ]
-        existMock.mockReturnValue(true)
-
-        readDirSyncMock.mockImplementation((path) => {
-          if (path === 'file://models') return ['model1', 'model2/model2-1']
-          else if (path === 'file://models/model1')
-            return ['model.json', 'test.gguf']
-          else return ['model.json', 'test.engine']
-        })
-
-        readFileSyncMock.mockImplementation((path) => {
-          if (path.includes('model1'))
-            return JSON.stringify(configuredModels[0])
-          else return JSON.stringify(configuredModels[1])
-        })
-
-        const result = await sut.getDownloadedModels()
-        expect(result).toEqual(
-          expect.arrayContaining([
-            expect.objectContaining({
-              file_path: 'file://models/model1/model.json',
-              id: '1',
-            }),
-            expect.objectContaining({
-              file_path: 'file://models/model2/model2-1/model.json',
-              id: '2',
-            }),
-          ])
-        )
-      })
-    })
-  })
-
-  describe('deleteModel', () => {
-    describe('model is a GGUF model', () => {
-      it('should delete the GGUF file', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockImplementation((path) => {
-          return ['model.json', 'test.gguf']
-        })
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.gguf'
-        )
-      })
-
-      it('no gguf file presented', async () => {
-        fs.unlinkSync = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        readDirSyncMock.mockReturnValue(['model.json'])
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledTimes(0)
-      })
-
-      it('delete an imported model', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.gguf'])
-
-        // MARK: This is a tricky logic implement?
-        // I will just add test for now but will align on the legacy implementation
-        fs.readFileSync = jest.fn().mockReturnValue(
-          JSON.stringify({
-            metadata: {
-              author: 'user',
-            },
-          })
-        )
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.rm).toHaveBeenCalledWith('file://models/model1')
-      })
-
-      it('delete tensorrt-models', async () => {
-        fs.rm = jest.fn()
-        const dirMock = dirName as jest.Mock
-        dirMock.mockReturnValue('file://models/model1')
-
-        readDirSyncMock.mockReturnValue(['model.json', 'test.engine'])
-
-        fs.readFileSync = jest.fn().mockReturnValue(JSON.stringify({}))
-
-        existMock.mockReturnValue(true)
-
-        await sut.deleteModel({
-          file_path: 'file://models/model1/model.json',
-        } as any)
-
-        expect(fs.unlinkSync).toHaveBeenCalledWith(
-          'file://models/model1/test.engine'
-        )
-      })
-    })
-  })
-
-  describe('downloadModel', () => {
-    const model: any = {
-      id: 'model-id',
-      name: 'Test Model',
-      sources: [
-        { url: 'http://example.com/model.gguf', filename: 'model.gguf' },
-      ],
-      engine: 'test-engine',
+    mockCortexAPI = {
+      getModels: jest.fn().mockResolvedValue([]),
+      pullModel: jest.fn().mockResolvedValue(undefined),
+      importModel: jest.fn().mockResolvedValue(undefined),
+      deleteModel: jest.fn().mockResolvedValue(undefined),
+      updateModel: jest.fn().mockResolvedValue({}),
+      cancelModelPull: jest.fn().mockResolvedValue(undefined),
     }
 
-    const network = {
-      ignoreSSL: true,
-      proxy: 'http://proxy.example.com',
-    }
+    // @ts-ignore
+    extension = new JanModelExtension()
+    extension.cortexAPI = mockCortexAPI
+  })
 
-    const gpuSettings: any = {
-      gpus: [{ name: 'nvidia-rtx-3080', arch: 'ampere' }],
-    }
+  it('should register settings on load', async () => {
+    // @ts-ignore
+    const registerSettingsSpy = jest.spyOn(extension, 'registerSettings')
+    await extension.onLoad()
+    expect(registerSettingsSpy).toHaveBeenCalledWith(SETTINGS)
+  })
 
-    it('should reject with invalid gguf metadata', async () => {
-      existMock.mockImplementation(() => false)
+  it('should pull a model', async () => {
+    const model = 'test-model'
+    await extension.pullModel(model)
+    expect(mockCortexAPI.pullModel).toHaveBeenCalledWith(model)
+  })
 
-      expect(
-        sut.downloadModel(model, gpuSettings, network)
-      ).rejects.toBeTruthy()
-    })
+  it('should cancel model download', async () => {
+    const model = 'test-model'
+    await extension.cancelModelPull(model)
+    expect(mockCortexAPI.cancelModelPull).toHaveBeenCalledWith(model)
+  })
 
-    it('should download corresponding ID', async () => {
-      existMock.mockImplementation(() => true)
-      dirNameMock.mockImplementation(() => 'file://models/model1')
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
+  it('should delete a model', async () => {
+    const model = 'test-model'
+    await extension.deleteModel(model)
+    expect(mockCortexAPI.deleteModel).toHaveBeenCalledWith(model)
+  })
 
-      expect(
-        await sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
+  it('should get all models', async () => {
+    const models = await extension.getModels()
+    expect(models).toEqual([])
+    expect(mockCortexAPI.getModels).toHaveBeenCalled()
+  })
 
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
+  it('should update a model', async () => {
+    const model: Partial<Model> = { id: 'test-model' }
+    const updatedModel = await extension.updateModel(model)
+    expect(updatedModel).toEqual({})
+    expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model)
+  })
 
-    it('should handle invalid model file', async () => {
-      executeMock.mockResolvedValue({})
-
-      fs.readFileSync = jest.fn(() => {
-        return JSON.stringify({ metadata: { author: 'user' } })
-      })
-
-      expect(
-        sut.downloadModel(
-          { ...model, file_path: 'file://models/model1/model.json' },
-          gpuSettings,
-          network
-        )
-      ).resolves.not.toThrow()
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-    it('should handle model file with no sources', async () => {
-      executeMock.mockResolvedValue({})
-      const modelWithoutSources = { ...model, sources: [] }
-
-      expect(
-        sut.downloadModel(
-          {
-            ...modelWithoutSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).resolves.toBe(undefined)
-
-      expect(downloadMock).not.toHaveBeenCalled()
-    })
-
-    it('should handle model file with multiple sources', async () => {
-      const modelWithMultipleSources = {
-        ...model,
-        sources: [
-          { url: 'http://example.com/model1.gguf', filename: 'model1.gguf' },
-          { url: 'http://example.com/model2.gguf', filename: 'model2.gguf' },
-        ],
-      }
-
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      downloadMock.mockImplementation(() => {
-        return Promise.resolve({})
-      })
-
-      expect(
-        await sut.downloadModel(
-          {
-            ...modelWithMultipleSources,
-            file_path: 'file://models/model1/model.json',
-          },
-          gpuSettings,
-          network
-        )
-      ).toBeUndefined()
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model1.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model1.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        {
-          localPath: 'file://models/model1/model2.gguf',
-          modelId: 'model-id',
-          url: 'http://example.com/model2.gguf',
-        },
-        { ignoreSSL: true, proxy: 'http://proxy.example.com' }
-      )
-    })
-
-    it('should handle model file with no file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithoutFilepath = { ...model, file_path: undefined }
-
-      await sut.downloadModel(modelWithoutFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model-id/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model file with invalid file_path', async () => {
-      executeMock.mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: { 'tokenizer.ggml.eos_token_id': 0 },
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-      }
-      const modelWithInvalidFilepath = {
-        ...model,
-        file_path: 'file://models/invalid-model.json',
-      }
-
-      await sut.downloadModel(modelWithInvalidFilepath, gpuSettings, network)
-
-      expect(downloadMock).toHaveBeenCalledWith(
-        expect.objectContaining({
-          localPath: 'file://models/model1/model.gguf',
-        }),
-        expect.anything()
-      )
-    })
-
-    it('should handle model with valid chat_template', async () => {
-      executeMock.mockResolvedValue('{prompt}')
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '{prompt}',
-        },
-      })
-    })
-
-    it('should handle model without chat_template', async () => {
-      executeMock.mockRejectedValue({})
-      ;(gguf as jest.Mock).mockResolvedValue({
-        metadata: {},
-      })
-      // @ts-ignore
-      global.NODE = 'node'
-      // @ts-ignore
-      global.DEFAULT_MODEL = {
-        parameters: { stop: [] },
-        settings: {
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      }
-
-      const result = await sut.retrieveGGUFMetadata({})
-
-      expect(result).toEqual({
-        parameters: {
-          stop: [],
-        },
-        settings: {
-          ctx_len: 4096,
-          ngl: 33,
-          prompt_template: '<|im-start|>{prompt}<|im-end|>',
-        },
-      })
-    })
+  it('should import a model', async () => {
+    const model: any = { path: 'test-path' }
+    const optionType: any = 'test-option'
+    await extension.importModel(model, optionType)
+    expect(mockCortexAPI.importModel).toHaveBeenCalledWith(
+      model.path,
+      optionType
+    )
   })
 })
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 7e7c12469..b879e0bb9 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -1,66 +1,47 @@
 import {
-  fs,
-  downloadFile,
-  abortDownload,
-  InferenceEngine,
-  joinPath,
   ModelExtension,
   Model,
-  getJanDataFolderPath,
-  events,
-  DownloadEvent,
-  DownloadRoute,
-  DownloadState,
-  OptionType,
-  ImportingModel,
-  LocalImportModelEvent,
-  baseName,
-  GpuSetting,
-  DownloadRequest,
-  executeOnMain,
-  HuggingFaceRepoData,
-  getFileSize,
-  AllQuantizations,
-  ModelEvent,
-  ModelFile,
+  InferenceEngine,
+  fs,
+  joinPath,
   dirName,
 } from '@janhq/core'
-
-import { extractFileName } from './helpers/path'
-import { GGUFMetadata, gguf } from '@huggingface/gguf'
-import { NotSupportedModelError } from './@types/NotSupportModelError'
-import { InvalidHostError } from './@types/InvalidHostError'
+import { CortexAPI } from './cortex'
 
 declare const SETTINGS: Array<any>
+
+/**
+ * TODO: Set env for HF access token? or via API request?
+ */
 enum Settings {
   huggingFaceAccessToken = 'hugging-face-access-token',
 }
 
+/**
+ * Extension enum
+ */
+enum ExtensionEnum {
+  downloadedModels = 'downloadedModels',
+}
+
 /**
  * A extension for models
  */
 export default class JanModelExtension extends ModelExtension {
   private static readonly _homeDir = 'file://models'
-  private static readonly _modelMetadataFileName = 'model.json'
-  private static readonly _supportedModelFormat = '.gguf'
-  private static readonly _incompletedModelFileName = '.download'
-  private static readonly _offlineInferenceEngine = [
-    InferenceEngine.nitro,
-    InferenceEngine.nitro_tensorrt_llm,
-  ]
-  private static readonly _tensorRtEngineFormat = '.engine'
-  private static readonly _supportedGpuArch = ['ampere', 'ada']
-
-  interrupted = false
+  cortexAPI: CortexAPI = new CortexAPI()
 
   /**
    * Called when the extension is loaded.
    * @override
    */
   async onLoad() {
-    // Handle Desktop Events
     this.registerSettings(SETTINGS)
-    this.handleDesktopEvents()
+
+    // Try get models from cortex.cpp
+    this.getModels().then((models) => {
+      this.registerModels(models)
+    })
   }
 
   /**
@@ -72,384 +53,145 @@ export default class JanModelExtension extends ModelExtension {
   /**
    * Downloads a machine learning model.
    * @param model - The model to download.
-   * @param network - Optional object to specify proxy/whether to ignore SSL certificates.
    * @returns A Promise that resolves when the model is downloaded.
    */
-  async downloadModel(
-    model: ModelFile,
-    gpuSettings?: GpuSetting,
-    network?: { ignoreSSL?: boolean; proxy?: string }
-  ): Promise<void> {
-    // Create corresponding directory
-    const modelDirPath = await joinPath([JanModelExtension._homeDir, model.id])
-    if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath)
-    const modelJsonPath =
-      model.file_path ?? (await joinPath([modelDirPath, 'model.json']))
-
-    // Download HF model - model.json not exist
-    if (!(await fs.existsSync(modelJsonPath))) {
-      // It supports only one source for HF download
-      const metadata = await this.fetchModelMetadata(model.sources[0].url)
-      const updatedModel = await this.retrieveGGUFMetadata(metadata)
-      if (updatedModel) {
-        // Update model settings
-        model.settings = {
-          ...model.settings,
-          ...updatedModel.settings,
-        }
-        model.parameters = {
-          ...model.parameters,
-          ...updatedModel.parameters,
-        }
-      }
-      await fs.writeFileSync(modelJsonPath, JSON.stringify(model, null, 2))
-      events.emit(ModelEvent.OnModelsUpdate, {})
-    }
-    if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
-      if (!gpuSettings || gpuSettings.gpus.length === 0) {
-        console.error('No GPU found. Please check your GPU setting.')
-        return
-      }
-      const firstGpu = gpuSettings.gpus[0]
-      if (!firstGpu.name.toLowerCase().includes('nvidia')) {
-        console.error('No Nvidia GPU found. Please check your GPU setting.')
-        return
-      }
-      const gpuArch = firstGpu.arch
-      if (gpuArch === undefined) {
-        console.error(
-          'No GPU architecture found. Please check your GPU setting.'
-        )
-        return
-      }
-
-      if (!JanModelExtension._supportedGpuArch.includes(gpuArch)) {
-        console.debug(
-          `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
-        )
-        return
-      }
-
-      const os = 'windows' // TODO: remove this hard coded value
-
-      const newSources = model.sources.map((source) => {
-        const newSource = { ...source }
-        newSource.url = newSource.url
-          .replace(/<os>/g, os)
-          .replace(/<gpuarch>/g, gpuArch)
-        return newSource
-      })
-      model.sources = newSources
-    }
-
-    console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
-
-    if (model.sources.length > 1) {
-      // path to model binaries
-      for (const source of model.sources) {
-        let path = extractFileName(
-          source.url,
-          JanModelExtension._supportedModelFormat
-        )
-        if (source.filename) {
-          path = model.file_path
-            ? await joinPath([await dirName(model.file_path), source.filename])
-            : await joinPath([modelDirPath, source.filename])
-        }
-
-        const downloadRequest: DownloadRequest = {
-          url: source.url,
-          localPath: path,
-          modelId: model.id,
-        }
-        downloadFile(downloadRequest, network)
-      }
-      // TODO: handle multiple binaries for web later
-    } else {
-      const fileName = extractFileName(
-        model.sources[0]?.url,
-        JanModelExtension._supportedModelFormat
-      )
-      const path = model.file_path
-        ? await joinPath([await dirName(model.file_path), fileName])
-        : await joinPath([modelDirPath, fileName])
-      const downloadRequest: DownloadRequest = {
-        url: model.sources[0]?.url,
-        localPath: path,
-        modelId: model.id,
-      }
-      downloadFile(downloadRequest, network)
-
-      if (window && window.core?.api && window.core.api.baseApiUrl) {
-        this.startPollingDownloadProgress(model.id)
-      }
-    }
-  }
-
-  private toHuggingFaceUrl(repoId: string): string {
-    try {
-      const url = new URL(repoId)
-      if (url.host !== 'huggingface.co') {
-        throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
-      }
-
-      const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
-      if (paths.length < 2) {
-        throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
-      }
-
-      return `${url.origin}/api/models/${paths[0]}/${paths[1]}`
-    } catch (err) {
-      if (err instanceof InvalidHostError) {
-        throw err
-      }
-
-      if (repoId.startsWith('https')) {
-        throw new Error(`Cannot parse url: ${repoId}`)
-      }
-
-      return `https://huggingface.co/api/models/${repoId}`
-    }
-  }
-
-  async fetchHuggingFaceRepoData(repoId: string): Promise<HuggingFaceRepoData> {
-    const sanitizedUrl = this.toHuggingFaceUrl(repoId)
-    console.debug('sanitizedUrl', sanitizedUrl)
-
-    const huggingFaceAccessToken = (
-      await this.getSetting<string>(Settings.huggingFaceAccessToken, '')
-    ).trim()
-
-    const headers = {
-      Accept: 'application/json',
-    }
-
-    if (huggingFaceAccessToken.length > 0) {
-      headers['Authorization'] = `Bearer ${huggingFaceAccessToken}`
-    }
-
-    const res = await fetch(sanitizedUrl, {
-      headers: headers,
-    })
-    const response = await res.json()
-    if (response['error'] != null) {
-      throw new Error(response['error'])
-    }
-
-    const data = response as HuggingFaceRepoData
-
-    if (data.tags.indexOf('gguf') === -1) {
-      throw new NotSupportedModelError(
-        `${repoId} is not supported. Only GGUF models are supported.`
-      )
-    }
-
-    const promises: Promise<number>[] = []
-
-    // fetching file sizes
-    const url = new URL(sanitizedUrl)
-    const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
-
-    for (const sibling of data.siblings) {
-      const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}`
-      sibling.downloadUrl = downloadUrl
-      promises.push(getFileSize(downloadUrl))
-    }
-
-    const result = await Promise.all(promises)
-    for (let i = 0; i < data.siblings.length; i++) {
-      data.siblings[i].fileSize = result[i]
-    }
-
-    AllQuantizations.forEach((quantization) => {
-      data.siblings.forEach((sibling) => {
-        if (!sibling.quantization && sibling.rfilename.includes(quantization)) {
-          sibling.quantization = quantization
-        }
-      })
-    })
-
-    data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`
-    return data
-  }
-
-  async fetchModelMetadata(url: string): Promise<GGUFMetadata> {
-    const { metadata } = await gguf(url)
-    return metadata
-  }
-
-  /**
-   * Specifically for Jan server.
-   */
-  private async startPollingDownloadProgress(modelId: string): Promise<void> {
-    // wait for some seconds before polling
-    await new Promise((resolve) => setTimeout(resolve, 3000))
-
-    return new Promise((resolve) => {
-      const interval = setInterval(async () => {
-        fetch(
-          `${window.core.api.baseApiUrl}/v1/download/${DownloadRoute.getDownloadProgress}/${modelId}`,
-          {
-            method: 'GET',
-            headers: { contentType: 'application/json' },
-          }
-        ).then(async (res) => {
-          const state: DownloadState = await res.json()
-          if (state.downloadState === 'end') {
-            events.emit(DownloadEvent.onFileDownloadSuccess, state)
-            clearInterval(interval)
-            resolve()
-            return
-          }
-
-          if (state.downloadState === 'error') {
-            events.emit(DownloadEvent.onFileDownloadError, state)
-            clearInterval(interval)
-            resolve()
-            return
-          }
-
-          events.emit(DownloadEvent.onFileDownloadUpdate, state)
-        })
-      }, 1000)
-    })
+  async pullModel(model: string): Promise<void> {
+    /**
+     * Sending POST to /models/pull/{id} endpoint to pull the model
+     */
+    return this.cortexAPI?.pullModel(model)
   }
 
   /**
    * Cancels the download of a specific machine learning model.
    *
-   * @param {string} modelId - The ID of the model whose download is to be cancelled.
+   * @param {string} model - The ID of the model whose download is to be cancelled.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  async cancelModelDownload(modelId: string): Promise<void> {
-    const path = await joinPath([JanModelExtension._homeDir, modelId, modelId])
-    try {
-      await abortDownload(path)
-      await fs.unlinkSync(path)
-    } catch (e) {
-      console.error(e)
-    }
+  async cancelModelPull(model: string): Promise<void> {
+    /**
+     * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
+     */
+    this.cortexAPI?.cancelModelPull(model)
   }
 
   /**
-   * Deletes a machine learning model.
-   * @param filePath - The path to the model file to delete.
+   * Deletes a pulled model
+   * @param model - The model to delete
    * @returns A Promise that resolves when the model is deleted.
    */
-  async deleteModel(model: ModelFile): Promise<void> {
-    try {
-      const dirPath = await dirName(model.file_path)
-      const jsonFilePath = await joinPath([
-        dirPath,
-        JanModelExtension._modelMetadataFileName,
-      ])
-      const modelInfo = JSON.parse(
-        await this.readModelMetadata(jsonFilePath)
-      ) as Model
-
-      // TODO: This is so tricky?
-      // Should depend on sources?
-      const isUserImportModel =
-        modelInfo.metadata?.author?.toLowerCase() === 'user'
-      if (isUserImportModel) {
-        // just delete the folder
-        return fs.rm(dirPath)
-      }
-
-      // remove all files under dirPath except model.json
-      const files = await fs.readdirSync(dirPath)
-      const deletePromises = files.map(async (fileName: string) => {
-        if (fileName !== JanModelExtension._modelMetadataFileName) {
-          return fs.unlinkSync(await joinPath([dirPath, fileName]))
-        }
-      })
-      await Promise.allSettled(deletePromises)
-    } catch (err) {
-      console.error(err)
-    }
+  async deleteModel(model: string): Promise<void> {
+    return this.cortexAPI?.deleteModel(model)
   }
 
   /**
-   * Gets all downloaded models.
+   * Gets all pulled models
    * @returns A Promise that resolves with an array of all models.
    */
-  async getDownloadedModels(): Promise<ModelFile[]> {
-    return await this.getModelsMetadata(
-      async (modelDir: string, model: Model) => {
-        if (!JanModelExtension._offlineInferenceEngine.includes(model.engine))
-          return true
+  async getModels(): Promise<Model[]> {
+    /**
+     * In this action, if return empty array right away
+     * it would reset app cache and app will not function properly
+     * should compare and try import
+     */
 
-        // model binaries (sources) are absolute path & exist
-        const existFiles = await Promise.all(
-          model.sources.map(
-            (source) =>
-              // Supposed to be a local file url
-              !source.url.startsWith(`http://`) &&
-              !source.url.startsWith(`https://`)
+    if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
+      // Updated from an older version than 0.5.5
+      // Scan through the models folder and import them (Legacy flow)
+      // Return models immediately
+      return this.scanModelsFolder().then((models) => {
+        return models ?? []
+      })
+    }
+
+    let currentModels: Model[] = []
+
+    try {
+      currentModels = JSON.parse(
+        localStorage.getItem(ExtensionEnum.downloadedModels)
+      ) as Model[]
+    } catch (e) {
+      currentModels = []
+      console.error(e)
+    }
+
+    /**
+     * Here we are filtering out the models that are not imported
+     * and are not using llama.cpp engine
+     */
+    var toImportModels = currentModels.filter(
+      (e) => e.engine === InferenceEngine.nitro
+    )
+
+    await this.cortexAPI?.getModels().then((models) => {
+      const existingIds = models.map((e) => e.id)
+      toImportModels = toImportModels.filter(
+        (e: Model) => !existingIds.includes(e.id)
+      )
+    })
+
+    console.log('To import models:', toImportModels.length)
+    /**
+     * There are models to import
+     * do not return models from cortex.cpp yet
+     * otherwise it will reset the app cache
+     * */
+    if (toImportModels.length > 0) {
+      // Import models
+      await Promise.all(
+        toImportModels.map(async (model: Model & { file_path: string }) =>
+          this.importModel(
+            model.id,
+            await joinPath([
+              await dirName(model.file_path),
+              model.sources[0]?.filename ??
+                model.settings?.llama_model_path ??
+                model.sources[0]?.url.split('/').pop() ??
+                model.id,
+            ])
           )
         )
-        if (existFiles.every((exist) => exist)) return true
+      )
 
-        const result = await fs
-          .readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
-          .then((files: string[]) => {
-            // Model binary exists in the directory
-            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
-            return (
-              files.includes(modelDir) ||
-              files.filter((file) => {
-                if (
-                  file.endsWith(JanModelExtension._incompletedModelFileName)
-                ) {
-                  return false
-                }
-                return (
-                  file
-                    .toLowerCase()
-                    .includes(JanModelExtension._supportedModelFormat) ||
-                  file
-                    .toLowerCase()
-                    .includes(JanModelExtension._tensorRtEngineFormat)
-                )
-                // Check if the number of matched files equals the number of sources
-              })?.length >= model.sources.length
-            )
-          })
+      return currentModels
+    }
 
-        return result
-      }
+    /**
+     * All models are imported successfully before
+     * just return models from cortex.cpp
+     */
+    return (
+      this.cortexAPI?.getModels().then((models) => {
+        return models
+      }) ?? Promise.resolve([])
     )
   }
 
-  private async getModelJsonPath(
-    folderFullPath: string
-  ): Promise<string | undefined> {
-    // try to find model.json recursively inside each folder
-    if (!(await fs.existsSync(folderFullPath))) return undefined
-
-    const files: string[] = await fs.readdirSync(folderFullPath)
-    if (files.length === 0) return undefined
-
-    if (files.includes(JanModelExtension._modelMetadataFileName)) {
-      return joinPath([
-        folderFullPath,
-        JanModelExtension._modelMetadataFileName,
-      ])
-    }
-    // continue recursive
-    for (const file of files) {
-      const path = await joinPath([folderFullPath, file])
-      const fileStats = await fs.fileStat(path)
-      if (fileStats.isDirectory) {
-        const result = await this.getModelJsonPath(path)
-        if (result) return result
-      }
-    }
+  /**
+   * Update a pulled model metadata
+   * @param model - The metadata of the model
+   */
+  async updateModel(model: Partial<Model>): Promise<Model> {
+    return this.cortexAPI
+      ?.updateModel(model)
+      .then(() => this.cortexAPI!.getModel(model.id))
   }
 
-  private async getModelsMetadata(
-    selector?: (path: string, model: Model) => Promise<boolean>
-  ): Promise<ModelFile[]> {
+  /**
+   * Import an existing model file
+   * @param model
+   * @param optionType
+   */
+  async importModel(model: string, modelPath: string): Promise<void> {
+    return this.cortexAPI?.importModel(model, modelPath)
+  }
+
+  //// LEGACY MODEL FOLDER ////
+  /**
+   * Scan through models folder and return downloaded models
+   * @returns
+   */
+  private async scanModelsFolder(): Promise<Model[]> {
     try {
       if (!(await fs.existsSync(JanModelExtension._homeDir))) {
         console.debug('Model folder not found')
@@ -459,10 +201,14 @@ export default class JanModelExtension extends ModelExtension {
       const files: string[] = await fs.readdirSync(JanModelExtension._homeDir)
 
       const allDirectories: string[] = []
-      for (const file of files) {
-        if (file === '.DS_Store') continue
-        if (file === 'config') continue
-        allDirectories.push(file)
+
+      for (const modelFolder of files) {
+        const fullModelFolderPath = await joinPath([
+          JanModelExtension._homeDir,
+          modelFolder,
+        ])
+        if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue
+        allDirectories.push(modelFolder)
       }
 
       const readJsonPromises = allDirectories.map(async (dirName) => {
@@ -477,7 +223,7 @@ export default class JanModelExtension extends ModelExtension {
 
         if (await fs.existsSync(jsonPath)) {
           // if we have the model.json file, read it
-          let model = await this.readModelMetadata(jsonPath)
+          let model = await fs.readFileSync(jsonPath, 'utf-8')
 
           model = typeof model === 'object' ? model : JSON.parse(model)
 
@@ -491,420 +237,89 @@ export default class JanModelExtension extends ModelExtension {
             ]
           }
           model.file_path = jsonPath
-          model.file_name = JanModelExtension._modelMetadataFileName
+          model.file_name = 'model.json'
 
-          if (selector && !(await selector?.(dirName, model))) {
-            return
-          }
-          return model
-        } else {
-          // otherwise, we generate our own model file
-          // TODO: we might have more than one binary file here. This will be addressed with new version of Model file
-          //  which is the PR from Hiro on branch Jan can see
-          return this.generateModelMetadata(dirName)
+          // Check model file exist
+          // model binaries (sources) are absolute path & exist (symlinked)
+          const existFiles = await Promise.all(
+            model.sources.map(
+              (source) =>
+                // Supposed to be a local file url
+                !source.url.startsWith(`http://`) &&
+                !source.url.startsWith(`https://`)
+            )
+          )
+          if (existFiles.every((exist) => exist)) return true
+
+          const result = await fs
+            .readdirSync(await joinPath([JanModelExtension._homeDir, dirName]))
+            .then((files: string[]) => {
+              // Model binary exists in the directory
+              // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
+              return (
+                files.includes(dirName) || // Legacy model GGUF without extension
+                files.filter((file) => {
+                  return (
+                    file.toLowerCase().endsWith('.gguf') || // GGUF
+                    file.toLowerCase().endsWith('.engine') // Tensort-LLM
+                  )
+                })?.length > 0 // TODO: find better way (can use basename to check the file name with source url)
+              )
+            })
+
+          if (result) return model
+          else return undefined
         }
       })
       const results = await Promise.allSettled(readJsonPromises)
-      const modelData = results.map((result) => {
-        if (result.status === 'fulfilled' && result.value) {
-          try {
-            const model =
-              typeof result.value === 'object'
-                ? result.value
-                : JSON.parse(result.value)
-            return model as ModelFile
-          } catch {
-            console.debug(`Unable to parse model metadata: ${result.value}`)
+      const modelData = results
+        .map((result) => {
+          if (result.status === 'fulfilled' && result.value) {
+            try {
+              const model =
+                typeof result.value === 'object'
+                  ? result.value
+                  : JSON.parse(result.value)
+              return model as Model
+            } catch {
+              console.debug(`Unable to parse model metadata: ${result.value}`)
+            }
           }
-        }
-        return undefined
-      })
+          return undefined
+        })
+        .filter((e) => !!e)
 
-      return modelData.filter((e) => !!e)
+      return modelData
     } catch (err) {
       console.error(err)
       return []
     }
   }
 
-  private readModelMetadata(path: string) {
-    return fs.readFileSync(path, 'utf-8')
-  }
-
   /**
-   * Handle the case where we have the model directory but we don't have the corresponding
-   * model.json file associated with it.
-   *
-   * This function will create a model.json file for the model.
-   * It works only with single binary file model.
-   *
-   * @param dirName the director which reside in ~/jan/models but does not have model.json file.
-   */
-  private async generateModelMetadata(dirName: string): Promise<Model> {
-    const files: string[] = await fs.readdirSync(
-      await joinPath([JanModelExtension._homeDir, dirName])
-    )
-
-    // sort files by name
-    files.sort()
-
-    // find the first file which is not a directory
-    let binaryFileName: string | undefined = undefined
-    let binaryFileSize: number | undefined = undefined
-
-    for (const file of files) {
-      if (file.endsWith(JanModelExtension._supportedModelFormat)) {
-        const path = await joinPath([JanModelExtension._homeDir, dirName, file])
-        const fileStats = await fs.fileStat(path)
-        if (fileStats.isDirectory) continue
-        binaryFileSize = fileStats.size
-        binaryFileName = file
-        break
-      }
-    }
-
-    if (!binaryFileName) {
-      console.warn(`Unable to find binary file for model ${dirName}`)
-      return
-    }
-
-    const defaultModel = (await this.getDefaultModel()) as Model
-    const metadata = await executeOnMain(
-      NODE,
-      'retrieveGGUFMetadata',
-      await joinPath([
-        await getJanDataFolderPath(),
-        'models',
-        dirName,
-        binaryFileName,
-      ])
-    ).catch(() => undefined)
-
-    const updatedModel = await this.retrieveGGUFMetadata(metadata)
-
-    if (!defaultModel) {
-      console.error('Unable to find default model')
-      return
-    }
-
-    const model: Model = {
-      ...defaultModel,
-      // Overwrite default N/A fields
-      id: dirName,
-      name: dirName,
-      sources: [
-        {
-          url: binaryFileName,
-          filename: binaryFileName,
-        },
-      ],
-      parameters: {
-        ...defaultModel.parameters,
-        ...updatedModel.parameters,
-      },
-      settings: {
-        ...defaultModel.settings,
-        ...updatedModel.settings,
-        llama_model_path: binaryFileName,
-      },
-      created: Date.now(),
-      description: '',
-      metadata: {
-        size: binaryFileSize,
-        author: 'User',
-        tags: [],
-      },
-    }
-
-    const modelFilePath = await joinPath([
-      JanModelExtension._homeDir,
-      dirName,
-      JanModelExtension._modelMetadataFileName,
-    ])
-
-    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
-
-    return model
-  }
-
-  override async getDefaultModel(): Promise<Model> {
-    const defaultModel = DEFAULT_MODEL as Model
-    return defaultModel
-  }
-
-  /**
-   * Gets all available models.
-   * @returns A Promise that resolves with an array of all models.
-   */
-  async getConfiguredModels(): Promise<ModelFile[]> {
-    return this.getModelsMetadata()
-  }
-
-  handleDesktopEvents() {
-    if (window && window.electronAPI) {
-      window.electronAPI.onFileDownloadUpdate(
-        async (_event: string, state: DownloadState | undefined) => {
-          if (!state) return
-          state.downloadState = 'downloading'
-          events.emit(DownloadEvent.onFileDownloadUpdate, state)
-        }
-      )
-      window.electronAPI.onFileDownloadError(
-        async (_event: string, state: DownloadState) => {
-          state.downloadState = 'error'
-          events.emit(DownloadEvent.onFileDownloadError, state)
-        }
-      )
-      window.electronAPI.onFileDownloadSuccess(
-        async (_event: string, state: DownloadState) => {
-          state.downloadState = 'end'
-          events.emit(DownloadEvent.onFileDownloadSuccess, state)
-        }
-      )
-    }
-  }
-
-  private async importModelSymlink(
-    modelBinaryPath: string,
-    modelFolderName: string,
-    modelFolderPath: string
-  ): Promise<ModelFile> {
-    const fileStats = await fs.fileStat(modelBinaryPath, true)
-    const binaryFileSize = fileStats.size
-
-    // Just need to generate model.json there
-    const defaultModel = (await this.getDefaultModel()) as Model
-    if (!defaultModel) {
-      console.error('Unable to find default model')
-      return
-    }
-
-    const metadata = await executeOnMain(
-      NODE,
-      'retrieveGGUFMetadata',
-      modelBinaryPath
-    )
-
-    const binaryFileName = await baseName(modelBinaryPath)
-    const updatedModel = await this.retrieveGGUFMetadata(metadata)
-
-    const model: Model = {
-      ...defaultModel,
-      id: modelFolderName,
-      name: modelFolderName,
-      sources: [
-        {
-          url: modelBinaryPath,
-          filename: binaryFileName,
-        },
-      ],
-      parameters: {
-        ...defaultModel.parameters,
-        ...updatedModel.parameters,
-      },
-
-      settings: {
-        ...defaultModel.settings,
-        ...updatedModel.settings,
-        llama_model_path: binaryFileName,
-      },
-      created: Date.now(),
-      description: '',
-      metadata: {
-        size: binaryFileSize,
-        author: 'User',
-        tags: [],
-      },
-    }
-
-    const modelFilePath = await joinPath([
-      modelFolderPath,
-      JanModelExtension._modelMetadataFileName,
-    ])
-
-    await fs.writeFileSync(modelFilePath, JSON.stringify(model, null, 2))
-
-    return {
-      ...model,
-      file_path: modelFilePath,
-      file_name: JanModelExtension._modelMetadataFileName,
-    }
-  }
-
-  async updateModelInfo(modelInfo: Partial<ModelFile>): Promise<ModelFile> {
-    if (modelInfo.id == null) throw new Error('Model ID is required')
-
-    const model = JSON.parse(
-      await this.readModelMetadata(modelInfo.file_path)
-    ) as ModelFile
-
-    const updatedModel: ModelFile = {
-      ...model,
-      ...modelInfo,
-      parameters: {
-        ...model.parameters,
-        ...modelInfo.parameters,
-      },
-      settings: {
-        ...model.settings,
-        ...modelInfo.settings,
-      },
-      metadata: {
-        ...model.metadata,
-        ...modelInfo.metadata,
-      },
-      // Should not persist file_path & file_name
-      file_path: undefined,
-      file_name: undefined,
-    }
-
-    await fs.writeFileSync(
-      modelInfo.file_path,
-      JSON.stringify(updatedModel, null, 2)
-    )
-    return updatedModel
-  }
-
-  private async importModel(
-    model: ImportingModel,
-    optionType: OptionType
-  ): Promise<Model> {
-    const binaryName = (await baseName(model.path)).replace(/\s/g, '')
-
-    let modelFolderName = binaryName
-    if (binaryName.endsWith(JanModelExtension._supportedModelFormat)) {
-      modelFolderName = binaryName.replace(
-        JanModelExtension._supportedModelFormat,
-        ''
-      )
-    }
-
-    const modelFolderPath = await this.getModelFolderName(modelFolderName)
-    await fs.mkdir(modelFolderPath)
-
-    const uniqueFolderName = await baseName(modelFolderPath)
-    const modelBinaryFile = binaryName.endsWith(
-      JanModelExtension._supportedModelFormat
-    )
-      ? binaryName
-      : `${binaryName}${JanModelExtension._supportedModelFormat}`
-
-    const binaryPath = await joinPath([modelFolderPath, modelBinaryFile])
-
-    if (optionType === 'SYMLINK') {
-      return this.importModelSymlink(
-        model.path,
-        uniqueFolderName,
-        modelFolderPath
-      )
-    }
-
-    const srcStat = await fs.fileStat(model.path, true)
-
-    // interval getting the file size to calculate the percentage
-    const interval = setInterval(async () => {
-      const destStats = await fs.fileStat(binaryPath, true)
-      const percentage = destStats.size / srcStat.size
-      events.emit(LocalImportModelEvent.onLocalImportModelUpdate, {
-        ...model,
-        percentage,
-      })
-    }, 1000)
-
-    await fs.copyFile(model.path, binaryPath)
-
-    clearInterval(interval)
-
-    // generate model json
-    return this.generateModelMetadata(uniqueFolderName)
-  }
-
-  private async getModelFolderName(
-    modelFolderName: string,
-    count?: number
-  ): Promise<string> {
-    const newModelFolderName = count
-      ? `${modelFolderName}-${count}`
-      : modelFolderName
-
-    const janDataFolderPath = await getJanDataFolderPath()
-    const modelFolderPath = await joinPath([
-      janDataFolderPath,
-      'models',
-      newModelFolderName,
-    ])
-
-    const isFolderExist = await fs.existsSync(modelFolderPath)
-    if (!isFolderExist) {
-      return modelFolderPath
-    } else {
-      const newCount = (count ?? 0) + 1
-      return this.getModelFolderName(modelFolderName, newCount)
-    }
-  }
-
-  async importModels(
-    models: ImportingModel[],
-    optionType: OptionType
-  ): Promise<void> {
-    const importedModels: Model[] = []
-
-    for (const model of models) {
-      events.emit(LocalImportModelEvent.onLocalImportModelUpdate, model)
-      try {
-        const importedModel = await this.importModel(model, optionType)
-        events.emit(LocalImportModelEvent.onLocalImportModelSuccess, {
-          ...model,
-          modelId: importedModel.id,
-        })
-        importedModels.push(importedModel)
-      } catch (err) {
-        events.emit(LocalImportModelEvent.onLocalImportModelFailed, {
-          ...model,
-          error: err,
-        })
-      }
-    }
-
-    events.emit(
-      LocalImportModelEvent.onLocalImportModelFinished,
-      importedModels
-    )
-  }
-
-  /**
-   * Retrieve Model Settings from GGUF Metadata
-   * @param metadata
+   * Retrieve the model.json path from a folder
+   * @param folderFullPath
    * @returns
    */
-  async retrieveGGUFMetadata(metadata: any): Promise<Partial<Model>> {
-    const defaultModel = DEFAULT_MODEL as Model
-    var template = await executeOnMain(
-      NODE,
-      'renderJinjaTemplate',
-      metadata
-    ).catch(() => undefined)
-
-    const eos_id = metadata['tokenizer.ggml.eos_token_id']
-    const architecture = metadata['general.architecture']
-
-    return {
-      settings: {
-        prompt_template: template ?? defaultModel.settings.prompt_template,
-        ctx_len:
-          metadata[`${architecture}.context_length`] ??
-          metadata['llama.context_length'] ??
-          4096,
-        ngl:
-          (metadata[`${architecture}.block_count`] ??
-            metadata['llama.block_count'] ??
-            32) + 1,
-      },
-      parameters: {
-        stop: eos_id
-          ? [metadata?.['tokenizer.ggml.tokens'][eos_id] ?? '']
-          : defaultModel.parameters.stop,
-      },
+  private async getModelJsonPath(
+    folderFullPath: string
+  ): Promise<string | undefined> {
+    // try to find model.json recursively inside each folder
+    if (!(await fs.existsSync(folderFullPath))) return undefined
+    const files: string[] = await fs.readdirSync(folderFullPath)
+    if (files.length === 0) return undefined
+    if (files.includes('model.json')) {
+      return joinPath([folderFullPath, 'model.json'])
+    }
+    // continue recursive
+    for (const file of files) {
+      const path = await joinPath([folderFullPath, file])
+      const fileStats = await fs.fileStat(path)
+      if (fileStats.isDirectory) {
+        const result = await this.getModelJsonPath(path)
+        if (result) return result
+      }
     }
   }
+  //// END LEGACY MODEL FOLDER ////
 }
diff --git a/extensions/model-extension/src/node/index.ts b/extensions/model-extension/src/node/index.ts
deleted file mode 100644
index 2acf6ec4a..000000000
--- a/extensions/model-extension/src/node/index.ts
+++ /dev/null
@@ -1,54 +0,0 @@
-import { closeSync, openSync, readSync } from 'fs'
-import { Template } from '@huggingface/jinja'
-/**
- * This is to retrieve the metadata from a GGUF file
- * It uses hyllama and jinja from @huggingface module
- */
-export const retrieveGGUFMetadata = async (ggufPath: string) => {
-  try {
-    const { ggufMetadata } = await import('hyllama')
-    // Read first 10mb of gguf file
-    const fd = openSync(ggufPath, 'r')
-    const buffer = new Uint8Array(10_000_000)
-    readSync(fd, buffer, 0, 10_000_000, 0)
-    closeSync(fd)
-
-    // Parse metadata and tensor info
-    const { metadata } = ggufMetadata(buffer.buffer)
-
-    return metadata
-  } catch (e) {
-    console.log('[MODEL_EXT]', e)
-  }
-}
-
-/**
- * Convert metadata to jinja template
- * @param metadata
- */
-export const renderJinjaTemplate = (metadata: any): string => {
-  const template = new Template(metadata['tokenizer.chat_template'])
-  const eos_id = metadata['tokenizer.ggml.eos_token_id']
-  const bos_id = metadata['tokenizer.ggml.bos_token_id']
-  if (eos_id === undefined || bos_id === undefined) {
-    return ''
-  }
-  const eos_token = metadata['tokenizer.ggml.tokens'][eos_id]
-  const bos_token = metadata['tokenizer.ggml.tokens'][bos_id]
-  // Parse jinja template
-  return template.render({
-    add_generation_prompt: true,
-    eos_token,
-    bos_token,
-    messages: [
-      {
-        role: 'system',
-        content: '{system_message}',
-      },
-      {
-        role: 'user',
-        content: '{prompt}',
-      },
-    ],
-  })
-}
diff --git a/extensions/model-extension/src/node/node.test.ts b/extensions/model-extension/src/node/node.test.ts
deleted file mode 100644
index afd2b8470..000000000
--- a/extensions/model-extension/src/node/node.test.ts
+++ /dev/null
@@ -1,53 +0,0 @@
-import { renderJinjaTemplate } from './index'
-import { Template } from '@huggingface/jinja'
-
-jest.mock('@huggingface/jinja', () => ({
-  Template: jest.fn((template: string) => ({
-    render: jest.fn(() => `${template}_rendered`),
-  })),
-}))
-
-describe('renderJinjaTemplate', () => {
-  beforeEach(() => {
-    jest.clearAllMocks() // Clear mocks between tests
-  })
-
-  it('should render the template with correct parameters', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.bos_token_id': 1,
-      'tokenizer.ggml.tokens': ['EOS', 'BOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('Hello, {{ messages }}!_rendered')
-  })
-
-  it('should handle missing token IDs gracefully', () => {
-    const metadata = {
-      'tokenizer.chat_template': 'Hello, {{ messages }}!',
-      'tokenizer.ggml.eos_token_id': 0,
-      'tokenizer.ggml.tokens': ['EOS'],
-    }
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith('Hello, {{ messages }}!')
-
-    expect(renderedTemplate).toBe('')
-  })
-
-  it('should handle empty template gracefully', () => {
-    const metadata = {}
-
-    const renderedTemplate = renderJinjaTemplate(metadata)
-
-    expect(Template).toHaveBeenCalledWith(undefined)
-
-    expect(renderedTemplate).toBe("")
-  })
-})
diff --git a/extensions/tensorrt-llm-extension/src/index.ts b/extensions/tensorrt-llm-extension/src/index.ts
index 11c86a9a7..d9c89242f 100644
--- a/extensions/tensorrt-llm-extension/src/index.ts
+++ b/extensions/tensorrt-llm-extension/src/index.ts
@@ -7,9 +7,7 @@ import {
   DownloadEvent,
   DownloadRequest,
   DownloadState,
-  GpuSetting,
   InstallationState,
-  Model,
   baseName,
   downloadFile,
   events,
@@ -23,7 +21,7 @@ import {
   ModelEvent,
   getJanDataFolderPath,
   SystemInformation,
-  ModelFile,
+  Model,
 } from '@janhq/core'
 
 /**
@@ -137,7 +135,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
     events.emit(ModelEvent.OnModelsUpdate, {})
   }
 
-  override async loadModel(model: ModelFile): Promise<void> {
+  override async loadModel(model: Model): Promise<void> {
     if ((await this.installationState()) === 'Installed')
       return super.loadModel(model)
 
@@ -177,7 +175,7 @@ export default class TensorRTLLMExtension extends LocalOAIEngine {
   override async inference(data: MessageRequest) {
     if (!this.loadedModel) return
     // TensorRT LLM Extension supports streaming only
-    if (data.model) data.model.parameters.stream = true
+    if (data.model && data.model.parameters) data.model.parameters.stream = true
     super.inference(data)
   }
 
diff --git a/extensions/tensorrt-llm-extension/src/node/index.ts b/extensions/tensorrt-llm-extension/src/node/index.ts
index 77003389f..d02427170 100644
--- a/extensions/tensorrt-llm-extension/src/node/index.ts
+++ b/extensions/tensorrt-llm-extension/src/node/index.ts
@@ -41,7 +41,7 @@ async function loadModel(
   // e.g. ~/jan/models/llama-2
   let modelFolder = params.modelFolder
 
-  if (params.model.settings.prompt_template) {
+  if (params.model.settings?.prompt_template) {
     const promptTemplate = params.model.settings.prompt_template
     const prompt = promptTemplateConverter(promptTemplate)
     if (prompt?.error) {
diff --git a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
index ddc2eab91..8eb16f549 100644
--- a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
+++ b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
@@ -9,11 +9,8 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
-import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
-
 export default function DownloadingState() {
   const downloadStates = useAtomValue(modelDownloadStateAtom)
-  const downloadingModels = useAtomValue(getDownloadingModelAtom)
   const { abortModelDownload } = useDownloadModel()
 
   const totalCurrentProgress = Object.values(downloadStates)
@@ -76,10 +73,7 @@ export default function DownloadingState() {
                       theme="destructive"
                       onClick={() => {
                         if (item?.modelId) {
-                          const model = downloadingModels.find(
-                            (model) => model.id === item.modelId
-                          )
-                          if (model) abortModelDownload(model)
+                          abortModelDownload(item?.modelId)
                         }
                       }}
                     >
diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
index 6ff6c894a..00d528f99 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/TableActiveModel/index.tsx
@@ -30,8 +30,8 @@ const TableActiveModel = () => {
                 </td>
                 <td className="px-4 py-2">
                   <Badge theme="secondary">
-                    {activeModel.metadata.size
-                      ? toGibibytes(activeModel.metadata.size)
+                    {activeModel.metadata?.size
+                      ? toGibibytes(activeModel.metadata?.size)
                       : '-'}
                   </Badge>
                 </td>
diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx
index e8d3842a8..fdc583911 100644
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@@ -30,7 +30,7 @@ const ModalCancelDownload = ({ model, isFromList }: Props) => {
   const onAbortDownloadClick = useCallback(() => {
     if (downloadState?.modelId) {
       const model = downloadingModels.find(
-        (model) => model.id === downloadState.modelId
+        (model) => model === downloadState.modelId
       )
       if (model) abortModelDownload(model)
     }
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index 59f19586a..7415f1165 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -88,7 +88,7 @@ const ModelDropdown = ({
   const searchInputRef = useRef<HTMLInputElement>(null)
   const configuredModels = useAtomValue(configuredModelsAtom)
   const featuredModel = configuredModels.filter((x) =>
-    x.metadata.tags.includes('Featured')
+    x.metadata?.tags?.includes('Featured')
   )
   const { updateThreadMetadata } = useCreateNewThread()
 
@@ -200,7 +200,7 @@ const ModelDropdown = ({
         if (model)
           updateModelParameter(activeThread, {
             params: modelParams,
-            modelPath: model.file_path,
+            // modelPath: model.file_path,
             modelId: model.id,
             engine: model.engine,
           })
@@ -444,7 +444,7 @@ const ModelDropdown = ({
                         <ul className="pb-2">
                           {featuredModel.map((model) => {
                             const isDownloading = downloadingModels.some(
-                              (md) => md.id === model.id
+                              (md) => md === model.id
                             )
                             return (
                               <li
@@ -465,13 +465,15 @@ const ModelDropdown = ({
                                 </div>
                                 <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                                   <span className="font-medium">
-                                    {toGibibytes(model.metadata.size)}
+                                    {toGibibytes(model.metadata?.size)}
                                   </span>
                                   {!isDownloading ? (
                                     <DownloadCloudIcon
                                       size={18}
                                       className="cursor-pointer text-[hsla(var(--app-link))]"
-                                      onClick={() => downloadModel(model)}
+                                      onClick={() =>
+                                        downloadModel(model.sources[0].url)
+                                      }
                                     />
                                   ) : (
                                     Object.values(downloadStates)
@@ -511,7 +513,7 @@ const ModelDropdown = ({
                         .map((model) => {
                           if (!showModel) return null
                           const isDownloading = downloadingModels.some(
-                            (md) => md.id === model.id
+                            (md) => md === model.id
                           )
                           const isDownloaded = downloadedModels.some(
                             (c) => c.id === model.id
@@ -549,14 +551,16 @@ const ModelDropdown = ({
                               <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                                 {!isDownloaded && (
                                   <span className="font-medium">
-                                    {toGibibytes(model.metadata.size)}
+                                    {toGibibytes(model.metadata?.size)}
                                   </span>
                                 )}
                                 {!isDownloading && !isDownloaded ? (
                                   <DownloadCloudIcon
                                     size={18}
                                     className="cursor-pointer text-[hsla(var(--app-link))]"
-                                    onClick={() => downloadModel(model)}
+                                    onClick={() =>
+                                      downloadModel(model.sources[0].url)
+                                    }
                                   />
                                 ) : (
                                   Object.values(downloadStates)
diff --git a/web/containers/ModelLabel/index.tsx b/web/containers/ModelLabel/index.tsx
index b0a3da96f..a6237ada6 100644
--- a/web/containers/ModelLabel/index.tsx
+++ b/web/containers/ModelLabel/index.tsx
@@ -42,7 +42,7 @@ const ModelLabel = ({ metadata, compact }: Props) => {
     const availableRam =
       settings?.run_mode === 'gpu'
         ? availableVram * 1000000 // MB to bytes
-        : totalRam - usedRam + (activeModel?.metadata.size ?? 0)
+        : totalRam - usedRam + (activeModel?.metadata?.size ?? 0)
     if (minimumRamModel > totalRam) {
       return (
         <NotEnoughMemoryLabel
@@ -59,10 +59,10 @@ const ModelLabel = ({ metadata, compact }: Props) => {
     return null
   }
 
-  return metadata.tags.includes('Coming Soon') ? (
+  return metadata?.tags?.includes('Coming Soon') ? (
     <UnsupportedModel />
   ) : (
-    getLabel(metadata.size ?? 0)
+    getLabel(metadata?.size ?? 0)
   )
 }
 
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index b35ab2e43..608160555 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -52,6 +52,21 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       if (state.downloadType === 'extension') {
         removeInstallingExtension(state.extensionId!)
       } else {
+        state.downloadState = 'error'
+        setDownloadState(state)
+      }
+    },
+    [setDownloadState, removeInstallingExtension]
+  )
+
+  const onFileDownloadStopped = useCallback(
+    (state: DownloadState) => {
+      console.debug('onFileDownloadError', state)
+      if (state.downloadType === 'extension') {
+        removeInstallingExtension(state.extensionId!)
+      } else {
+        state.downloadState = 'error'
+        state.error = 'aborted'
         setDownloadState(state)
       }
     },
@@ -62,6 +77,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     (state: DownloadState) => {
       console.debug('onFileDownloadSuccess', state)
       if (state.downloadType !== 'extension') {
+        state.downloadState = 'end'
         setDownloadState(state)
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
@@ -87,6 +103,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     events.on(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
     events.on(DownloadEvent.onFileDownloadError, onFileDownloadError)
     events.on(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+    events.on(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped)
     events.on(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
 
     return () => {
diff --git a/web/extension/ExtensionManager.ts b/web/extension/ExtensionManager.ts
index aa1a7674b..811126f85 100644
--- a/web/extension/ExtensionManager.ts
+++ b/web/extension/ExtensionManager.ts
@@ -8,6 +8,7 @@ import Extension from './Extension'
  * Manages the registration and retrieval of extensions.
  */
 export class ExtensionManager {
+  date = new Date().toISOString()
   // Registered extensions
   private extensions = new Map<string, BaseExtension>()
 
diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts
index 6abc42c9e..0f5367f64 100644
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@@ -1,4 +1,4 @@
-import { ImportingModel, InferenceEngine, Model, ModelFile } from '@janhq/core'
+import { ImportingModel, InferenceEngine, Model } from '@janhq/core'
 import { atom } from 'jotai'
 import { atomWithStorage } from 'jotai/utils'
 
@@ -14,7 +14,7 @@ enum ModelStorageAtomKeys {
  * Downloaded Models Atom
  * This atom stores the list of models that have been downloaded.
  */
-export const downloadedModelsAtom = atomWithStorage<ModelFile[]>(
+export const downloadedModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.DownloadedModels,
   []
 )
@@ -23,7 +23,7 @@ export const downloadedModelsAtom = atomWithStorage<ModelFile[]>(
  * Configured Models Atom
  * This atom stores the list of models that have been configured and available to download
  */
-export const configuredModelsAtom = atomWithStorage<ModelFile[]>(
+export const configuredModelsAtom = atomWithStorage<Model[]>(
   ModelStorageAtomKeys.AvailableModels,
   []
 )
@@ -43,12 +43,18 @@ export const removeDownloadedModelAtom = atom(
 /**
  * Atom to store the selected model (from ModelDropdown)
  */
-export const selectedModelAtom = atom<ModelFile | undefined>(undefined)
+export const selectedModelAtom = atom<Model | undefined>(undefined)
 
 /**
  * Atom to store the expanded engine sections (from ModelDropdown)
  */
-export const showEngineListModelAtom = atom<string[]>([InferenceEngine.nitro])
+export const showEngineListModelAtom = atom<string[]>([
+  InferenceEngine.nitro,
+  InferenceEngine.cortex,
+  InferenceEngine.cortex_llamacpp,
+  InferenceEngine.cortex_onnx,
+  InferenceEngine.cortex_tensorrtllm,
+])
 
 /// End Models Atom
 /// Model Download Atom
@@ -58,13 +64,13 @@ export const stateModel = atom({ state: 'start', loading: false, model: '' })
 /**
  * Stores the list of models which are being downloaded.
  */
-const downloadingModelsAtom = atom<Model[]>([])
+const downloadingModelsAtom = atom<string[]>([])
 
 export const getDownloadingModelAtom = atom((get) => get(downloadingModelsAtom))
 
-export const addDownloadingModelAtom = atom(null, (get, set, model: Model) => {
+export const addDownloadingModelAtom = atom(null, (get, set, model: string) => {
   const downloadingModels = get(downloadingModelsAtom)
-  if (!downloadingModels.find((e) => e.id === model.id)) {
+  if (!downloadingModels.find((e) => e === model)) {
     set(downloadingModelsAtom, [...downloadingModels, model])
   }
 })
@@ -76,7 +82,7 @@ export const removeDownloadingModelAtom = atom(
 
     set(
       downloadingModelsAtom,
-      downloadingModels.filter((e) => e.id !== modelId)
+      downloadingModels.filter((e) => e !== modelId)
     )
   }
 )
@@ -88,10 +94,6 @@ export const removeDownloadingModelAtom = atom(
 // store the paths of the models that are being imported
 export const importingModelsAtom = atom<ImportingModel[]>([])
 
-// DEPRECATED: Remove when moving to cortex.cpp
-// Default model template when importing
-export const defaultModelAtom = atom<Model | undefined>(undefined)
-
 /**
  * Importing progress Atom
  */
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 2d53678c3..8dd71fcc5 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useRef } from 'react'
 
-import { EngineManager, Model, ModelFile } from '@janhq/core'
+import { EngineManager, Model } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
@@ -11,7 +11,7 @@ import { vulkanEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
-export const activeModelAtom = atom<ModelFile | undefined>(undefined)
+export const activeModelAtom = atom<Model | undefined>(undefined)
 export const loadModelErrorAtom = atom<string | undefined>(undefined)
 
 type ModelState = {
@@ -37,7 +37,7 @@ export function useActiveModel() {
   const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
   const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
 
-  const downloadedModelsRef = useRef<ModelFile[]>([])
+  const downloadedModelsRef = useRef<Model[]>([])
 
   useEffect(() => {
     downloadedModelsRef.current = downloadedModels
@@ -55,11 +55,6 @@ export function useActiveModel() {
 
     let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
 
-    const error = await stopModel().catch((error: Error) => error)
-    if (error) {
-      return Promise.reject(error)
-    }
-
     setLoadModelError(undefined)
 
     setActiveModel(undefined)
@@ -144,7 +139,7 @@ export function useActiveModel() {
     const engine = EngineManager.instance().get(stoppingModel.engine)
     return engine
       ?.unloadModel(stoppingModel)
-      .catch()
+      .catch((e) => console.error(e))
       .then(() => {
         setActiveModel(undefined)
         setStateModel({ state: 'start', loading: false, model: undefined })
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index e65353753..75aa99c27 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -8,7 +8,7 @@ import {
   ThreadAssistantInfo,
   ThreadState,
   AssistantTool,
-  ModelFile,
+  Model,
 } from '@janhq/core'
 import { atom, useAtomValue, useSetAtom } from 'jotai'
 
@@ -76,7 +76,7 @@ export const useCreateNewThread = () => {
 
   const requestCreateNewThread = async (
     assistant: Assistant,
-    model?: ModelFile | undefined
+    model?: Model | undefined
   ) => {
     // Stop generating if any
     setIsGeneratingResponse(false)
diff --git a/web/hooks/useDeleteModel.test.ts b/web/hooks/useDeleteModel.test.ts
index 336a1cd0c..3a6587d7b 100644
--- a/web/hooks/useDeleteModel.test.ts
+++ b/web/hooks/useDeleteModel.test.ts
@@ -16,7 +16,7 @@ describe('useDeleteModel', () => {
   const mockModel: any = {
     id: 'test-model',
     name: 'Test Model',
-    // Add other required properties of ModelFile
+    // Add other required properties of Model
   }
 
   const mockDeleteModel = jest.fn()
diff --git a/web/hooks/useDeleteModel.ts b/web/hooks/useDeleteModel.ts
index 5a7a319b2..5621a78b8 100644
--- a/web/hooks/useDeleteModel.ts
+++ b/web/hooks/useDeleteModel.ts
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { ExtensionTypeEnum, ModelExtension, ModelFile } from '@janhq/core'
+import { ExtensionTypeEnum, ModelExtension, Model } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
 
@@ -13,8 +13,8 @@ export default function useDeleteModel() {
   const removeDownloadedModel = useSetAtom(removeDownloadedModelAtom)
 
   const deleteModel = useCallback(
-    async (model: ModelFile) => {
-      await localDeleteModel(model)
+    async (model: Model) => {
+      await localDeleteModel(model.id)
       removeDownloadedModel(model.id)
       toaster({
         title: 'Model Deletion Successful',
@@ -28,7 +28,7 @@ export default function useDeleteModel() {
   return { deleteModel }
 }
 
-const localDeleteModel = async (model: ModelFile) =>
+const localDeleteModel = async (model: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
     ?.deleteModel(model)
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 0cd21ea83..82ce593e2 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -1,106 +1,47 @@
 import { useCallback } from 'react'
 
 import {
-  Model,
+  events,
   ExtensionTypeEnum,
+  ModelEvent,
   ModelExtension,
-  abortDownload,
-  joinPath,
-  ModelArtifact,
-  DownloadState,
-  GpuSetting,
-  ModelFile,
-  dirName,
 } from '@janhq/core'
 
-import { useAtomValue, useSetAtom } from 'jotai'
+import { useSetAtom } from 'jotai'
 
-import { setDownloadStateAtom } from './useDownloadState'
-
-import useGpuSetting from './useGpuSetting'
+import { toaster } from '@/containers/Toast'
 
 import { extensionManager } from '@/extension/ExtensionManager'
+
 import {
-  ignoreSslAtom,
-  proxyAtom,
-  proxyEnabledAtom,
-} from '@/helpers/atoms/AppConfig.atom'
-import { addDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
+  addDownloadingModelAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 export default function useDownloadModel() {
-  const ignoreSSL = useAtomValue(ignoreSslAtom)
-  const proxy = useAtomValue(proxyAtom)
-  const proxyEnabled = useAtomValue(proxyEnabledAtom)
-  const setDownloadState = useSetAtom(setDownloadStateAtom)
   const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
-
-  const { getGpuSettings } = useGpuSetting()
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
 
   const downloadModel = useCallback(
-    async (model: Model) => {
-      const childProgresses: DownloadState[] = model.sources.map(
-        (source: ModelArtifact) => ({
-          fileName: source.filename,
-          modelId: model.id,
-          time: {
-            elapsed: 0,
-            remaining: 0,
-          },
-          speed: 0,
-          percent: 0,
-          size: {
-            total: 0,
-            transferred: 0,
-          },
-          downloadState: 'downloading',
-        })
-      )
-
-      // set an initial download state
-      setDownloadState({
-        fileName: '',
-        modelId: model.id,
-        time: {
-          elapsed: 0,
-          remaining: 0,
-        },
-        speed: 0,
-        percent: 0,
-        size: {
-          total: 0,
-          transferred: 0,
-        },
-        children: childProgresses,
-        downloadState: 'downloading',
-      })
-
+    async (model: string) => {
       addDownloadingModel(model)
-      const gpuSettings = await getGpuSettings()
-      await localDownloadModel(
-        model,
-        ignoreSSL,
-        proxyEnabled ? proxy : '',
-        gpuSettings
-      )
+      localDownloadModel(model).catch((error) => {
+        if (error.message) {
+          toaster({
+            title: 'Download failed',
+            description: error.message,
+            type: 'error',
+          })
+        }
+
+        removeDownloadingModel(model)
+      })
     },
-    [
-      ignoreSSL,
-      proxy,
-      proxyEnabled,
-      getGpuSettings,
-      addDownloadingModel,
-      setDownloadState,
-    ]
+    [addDownloadingModel]
   )
 
-  const abortModelDownload = useCallback(async (model: Model | ModelFile) => {
-    for (const source of model.sources) {
-      const path =
-        'file_path' in model
-          ? await joinPath([await dirName(model.file_path), source.filename])
-          : await joinPath(['models', model.id, source.filename])
-      await abortDownload(path)
-    }
+  const abortModelDownload = useCallback(async (model: string) => {
+    await cancelModelDownload(model)
   }, [])
 
   return {
@@ -109,12 +50,12 @@ export default function useDownloadModel() {
   }
 }
 
-const localDownloadModel = async (
-  model: Model,
-  ignoreSSL: boolean,
-  proxy: string,
-  gpuSettings?: GpuSetting
-) =>
+const localDownloadModel = async (model: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.downloadModel(model, gpuSettings, { ignoreSSL, proxy })
+    ?.pullModel(model)
+
+const cancelModelDownload = async (model: string) =>
+  extensionManager
+    .get<ModelExtension>(ExtensionTypeEnum.Model)
+    ?.cancelModelPull(model)
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 03a8883cb..59267749e 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -77,7 +77,7 @@ export const setDownloadStateAtom = atom(
         }
       } else {
         // download in progress
-        if (state.size.total === 0) {
+        if (state.size.total === 0 || !currentState[state.modelId]) {
           // this is initial state, just set the state
           currentState[state.modelId] = state
           set(modelDownloadStateAtom, currentState)
diff --git a/web/hooks/useGetHFRepoData.ts b/web/hooks/useGetHFRepoData.ts
index 3dab2c72e..4e3308116 100644
--- a/web/hooks/useGetHFRepoData.ts
+++ b/web/hooks/useGetHFRepoData.ts
@@ -1,12 +1,6 @@
 import { useCallback, useState } from 'react'
 
-import {
-  ExtensionTypeEnum,
-  HuggingFaceRepoData,
-  ModelExtension,
-} from '@janhq/core'
-
-import { extensionManager } from '@/extension'
+import { HuggingFaceRepoData } from '@janhq/core'
 
 export const useGetHFRepoData = () => {
   const [error, setError] = useState<string | undefined>(undefined)
@@ -35,7 +29,8 @@ export const useGetHFRepoData = () => {
 const extensionGetHfRepoData = async (
   repoId: string
 ): Promise<HuggingFaceRepoData | undefined> => {
-  return extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.fetchHuggingFaceRepoData(repoId)
+  return Promise.resolve(undefined)
+  // return extensionManager
+  //   .get<ModelExtension>(ExtensionTypeEnum.Model)
+  //   ?.fetchHuggingFaceRepoData(repoId)
 }
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index effc64f86..df6b085ca 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -104,16 +104,22 @@ const useImportModel = () => {
 const localImportModels = async (
   models: ImportingModel[],
   optionType: OptionType
-): Promise<void> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.importModels(models, optionType)
+): Promise<void> => {
+  await models
+    .filter((e) => !!e.modelId)
+    .map((model) => {
+      if (model.modelId)
+        extensionManager
+          .get<ModelExtension>(ExtensionTypeEnum.Model)
+          ?.importModel(model.modelId, model.path)
+    })
+}
 
 const localUpdateModelInfo = async (
   modelInfo: Partial<Model>
 ): Promise<Model | undefined> =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.updateModelInfo(modelInfo)
+    ?.updateModel(modelInfo)
 
 export default useImportModel
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 58def79c6..1cbd970d6 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -5,8 +5,8 @@ import {
   Model,
   ModelEvent,
   ModelExtension,
-  ModelFile,
   events,
+  ModelManager,
 } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
@@ -14,7 +14,6 @@ import { useSetAtom } from 'jotai'
 import { extensionManager } from '@/extension'
 import {
   configuredModelsAtom,
-  defaultModelAtom,
   downloadedModelsAtom,
 } from '@/helpers/atoms/Model.atom'
 
@@ -25,32 +24,22 @@ import {
  */
 const useModels = () => {
   const setDownloadedModels = useSetAtom(downloadedModelsAtom)
-  const setConfiguredModels = useSetAtom(configuredModelsAtom)
-  const setDefaultModel = useSetAtom(defaultModelAtom)
+  const setExtensionModels = useSetAtom(configuredModelsAtom)
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
-      const models = await getLocalDownloadedModels()
+      const models = await getModels()
       setDownloadedModels(models)
     }
 
-    const getConfiguredModels = async () => {
-      const models = await getLocalConfiguredModels()
-      setConfiguredModels(models)
-    }
-
-    const getDefaultModel = async () => {
-      const defaultModel = await getLocalDefaultModel()
-      setDefaultModel(defaultModel)
+    const getExtensionModels = async () => {
+      const models = ModelManager.instance().models.values().toArray()
+      setExtensionModels(models)
     }
 
     // Fetch all data
-    Promise.all([
-      getDownloadedModels(),
-      getConfiguredModels(),
-      getDefaultModel(),
-    ])
-  }, [setDownloadedModels, setConfiguredModels, setDefaultModel])
+    Promise.all([getDownloadedModels(), getExtensionModels()])
+  }, [setDownloadedModels, setExtensionModels])
 
   useEffect(() => {
     // Try get data on mount
@@ -65,22 +54,8 @@ const useModels = () => {
   }, [getData])
 }
 
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalDefaultModel = async (): Promise<Model | undefined> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDefaultModel()
-
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalConfiguredModels = async (): Promise<ModelFile[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getConfiguredModels() ?? []
-
-// TODO: Deprecated - Remove when moving to cortex.cpp
-const getLocalDownloadedModels = async (): Promise<ModelFile[]> =>
-  extensionManager
-    .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.getDownloadedModels() ?? []
+const getModels = async (): Promise<Model[]> =>
+  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
+  []
 
 export default useModels
diff --git a/web/hooks/useRecommendedModel.ts b/web/hooks/useRecommendedModel.ts
index ed56efa55..21a9c69e7 100644
--- a/web/hooks/useRecommendedModel.ts
+++ b/web/hooks/useRecommendedModel.ts
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useState } from 'react'
 
-import { Model, InferenceEngine, ModelFile } from '@janhq/core'
+import { Model, InferenceEngine } from '@janhq/core'
 
 import { atom, useAtomValue } from 'jotai'
 
@@ -24,16 +24,12 @@ export const LAST_USED_MODEL_ID = 'last-used-model-id'
  */
 export default function useRecommendedModel() {
   const activeModel = useAtomValue(activeModelAtom)
-  const [sortedModels, setSortedModels] = useState<ModelFile[]>([])
-  const [recommendedModel, setRecommendedModel] = useState<
-    ModelFile | undefined
-  >()
+  const [sortedModels, setSortedModels] = useState<Model[]>([])
+  const [recommendedModel, setRecommendedModel] = useState<Model | undefined>()
   const activeThread = useAtomValue(activeThreadAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
 
-  const getAndSortDownloadedModels = useCallback(async (): Promise<
-    ModelFile[]
-  > => {
+  const getAndSortDownloadedModels = useCallback(async (): Promise<Model[]> => {
     const models = downloadedModels.sort((a, b) =>
       a.engine !== InferenceEngine.nitro && b.engine === InferenceEngine.nitro
         ? 1
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 1dbd5b45e..bab515a30 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -123,65 +123,27 @@ export default function useSendChatMessage() {
   }
 
   const resendChatMessage = async (currentMessage: ThreadMessage) => {
-    if (!activeThreadRef.current) {
-      console.error('No active thread')
-      return
-    }
-    updateThreadWaiting(activeThreadRef.current.id, true)
+    // Delete last response before regenerating
+    const newConvoData = currentMessages
+    let toSendMessage = currentMessage
 
-    const requestBuilder = new MessageRequestBuilder(
-      MessageRequestType.Thread,
-      activeThreadRef.current.assistants[0].model ?? selectedModelRef.current,
-      activeThreadRef.current,
-      currentMessages
-    )
-      .addSystemMessage(activeThreadRef.current.assistants[0]?.instructions)
-      .removeLastAssistantMessage()
+    do {
+      deleteMessage(currentMessage.id)
+      const msg = newConvoData.pop()
+      if (!msg) break
+      toSendMessage = msg
+      deleteMessage(toSendMessage.id ?? '')
+    } while (toSendMessage.role !== ChatCompletionRole.User)
 
-    const modelId =
-      selectedModelRef.current?.id ??
-      activeThreadRef.current.assistants[0].model.id
-
-    if (modelRef.current?.id !== modelId) {
-      const error = await startModel(modelId).catch((error: Error) => error)
-      if (error) {
-        updateThreadWaiting(activeThreadRef.current.id, false)
-        return
-      }
+    if (activeThreadRef.current) {
+      await extensionManager
+        .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
+        ?.writeMessages(activeThreadRef.current.id, newConvoData)
     }
 
-    setIsGeneratingResponse(true)
-
-    if (currentMessage.role !== ChatCompletionRole.User) {
-      // Delete last response before regenerating
-      deleteMessage(currentMessage.id ?? '')
-      if (activeThreadRef.current) {
-        await extensionManager
-          .get<ConversationalExtension>(ExtensionTypeEnum.Conversational)
-          ?.writeMessages(
-            activeThreadRef.current.id,
-            currentMessages.filter((msg) => msg.id !== currentMessage.id)
-          )
-      }
-    }
-    // Process message request with Assistants tools
-    const request = await ToolManager.instance().process(
-      requestBuilder.build(),
-      activeThreadRef.current.assistants?.flatMap(
-        (assistant) => assistant.tools ?? []
-      ) ?? []
-    )
-
-    request.messages = normalizeMessages(request.messages ?? [])
-
-    const engine =
-      requestBuilder.model?.engine ?? selectedModelRef.current?.engine ?? ''
-
-    EngineManager.instance().get(engine)?.inference(request)
+    sendChatMessage(toSendMessage.content[0]?.text.value)
   }
 
-  // Define interface extending Array prototype
-
   const sendChatMessage = async (message: string) => {
     if (!message || message.trim().length === 0) return
 
@@ -294,6 +256,7 @@ export default function useSendChatMessage() {
     )
     request.messages = normalizeMessages(request.messages ?? [])
 
+    console.log(requestBuilder.model?.engine ?? modelRequest.engine, request)
     // Request for inference
     EngineManager.instance()
       .get(requestBuilder.model?.engine ?? modelRequest.engine ?? '')
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index 44a3fd278..ce5a12957 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -1,6 +1,6 @@
 import { useCallback } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Button, Badge, Tooltip } from '@janhq/joi'
 
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -38,7 +38,7 @@ import {
 } from '@/helpers/atoms/SystemBar.atom'
 
 type Props = {
-  model: ModelFile
+  model: Model
   onClick: () => void
   open: string
 }
@@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
   const assistants = useAtomValue(assistantsAtom)
 
   const onDownloadClick = useCallback(() => {
-    downloadModel(model)
+    downloadModel(model.sources[0].url)
   }, [model, downloadModel])
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
@@ -81,7 +81,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
     </Button>
   )
 
-  const isDownloading = downloadingModels.some((md) => md.id === model.id)
+  const isDownloading = downloadingModels.some((md) => md === model.id)
 
   const onUseModelClick = useCallback(async () => {
     if (assistants.length === 0) {
@@ -144,7 +144,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
         <div className="inline-flex items-center space-x-2">
           <div className="hidden items-center sm:inline-flex">
             <span className="mr-4 font-semibold">
-              {toGibibytes(model.metadata.size)}
+              {toGibibytes(model.metadata?.size)}
             </span>
             <ModelLabel metadata={model.metadata} />
           </div>
diff --git a/web/screens/Hub/ModelList/ModelItem/index.tsx b/web/screens/Hub/ModelList/ModelItem/index.tsx
index ec9d885a1..a077dbffc 100644
--- a/web/screens/Hub/ModelList/ModelItem/index.tsx
+++ b/web/screens/Hub/ModelList/ModelItem/index.tsx
@@ -1,6 +1,6 @@
 import { useState } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Badge } from '@janhq/joi'
 
 import { twMerge } from 'tailwind-merge'
@@ -12,7 +12,7 @@ import ModelItemHeader from '@/screens/Hub/ModelList/ModelHeader'
 import { toGibibytes } from '@/utils/converter'
 
 type Props = {
-  model: ModelFile
+  model: Model
 }
 
 const ModelItem: React.FC<Props> = ({ model }) => {
@@ -34,7 +34,7 @@ const ModelItem: React.FC<Props> = ({ model }) => {
           <div className="flex w-full flex-col border-t border-[hsla(var(--app-border))] p-4 ">
             <div className="my-2 inline-flex items-center sm:hidden">
               <span className="mr-4 font-semibold">
-                {toGibibytes(model.metadata.size)}
+                {toGibibytes(model.metadata?.size)}
               </span>
               <ModelLabel metadata={model.metadata} />
             </div>
@@ -49,9 +49,9 @@ const ModelItem: React.FC<Props> = ({ model }) => {
                 <span className="font-semibold ">Author</span>
                 <p
                   className="mt-2 line-clamp-1 font-medium text-[hsla(var(--text-secondary))]"
-                  title={model.metadata.author}
+                  title={model.metadata?.author}
                 >
-                  {model.metadata.author}
+                  {model.metadata?.author}
                 </p>
               </div>
               <div>
@@ -66,7 +66,7 @@ const ModelItem: React.FC<Props> = ({ model }) => {
               <div>
                 <span className="mb-1 font-semibold ">Tags</span>
                 <div className="mt-2 flex flex-wrap gap-x-1 gap-y-1">
-                  {model.metadata.tags.map((tag: string) => (
+                  {model.metadata?.tags?.map((tag: string) => (
                     <Badge key={tag} title={tag} variant="soft">
                       {tag}
                     </Badge>
diff --git a/web/screens/Hub/ModelList/index.tsx b/web/screens/Hub/ModelList/index.tsx
index 8fc30d541..0d7865a81 100644
--- a/web/screens/Hub/ModelList/index.tsx
+++ b/web/screens/Hub/ModelList/index.tsx
@@ -1,6 +1,6 @@
 import { useMemo } from 'react'
 
-import { ModelFile } from '@janhq/core'
+import { Model } from '@janhq/core'
 
 import { useAtomValue } from 'jotai'
 
@@ -9,16 +9,16 @@ import ModelItem from '@/screens/Hub/ModelList/ModelItem'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 type Props = {
-  models: ModelFile[]
+  models: Model[]
 }
 
 const ModelList = ({ models }: Props) => {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
-  const sortedModels: ModelFile[] = useMemo(() => {
-    const featuredModels: ModelFile[] = []
-    const remoteModels: ModelFile[] = []
-    const localModels: ModelFile[] = []
-    const remainingModels: ModelFile[] = []
+  const sortedModels: Model[] = useMemo(() => {
+    const featuredModels: Model[] = []
+    const remoteModels: Model[] = []
+    const localModels: Model[] = []
+    const remainingModels: Model[] = []
     models.forEach((m) => {
       if (m.metadata?.tags?.includes('Featured')) {
         featuredModels.push(m)
@@ -30,9 +30,9 @@ const ModelList = ({ models }: Props) => {
         remainingModels.push(m)
       }
     })
-    featuredModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
-    localModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
-    remainingModels.sort((m1, m2) => m1.metadata.size - m2.metadata.size)
+    featuredModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
+    localModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
+    remainingModels.sort((m1, m2) => m1.metadata?.size - m2.metadata?.size)
     remoteModels.sort((m1, m2) => m1.name.localeCompare(m2.name))
     return [
       ...featuredModels,
diff --git a/web/screens/Hub/index.tsx b/web/screens/Hub/index.tsx
index 8148a6bb5..382cf5667 100644
--- a/web/screens/Hub/index.tsx
+++ b/web/screens/Hub/index.tsx
@@ -52,7 +52,7 @@ const HubScreen = () => {
     } else if (sortSelected === 'featured') {
       return (
         x.name.toLowerCase().includes(searchValue.toLowerCase()) &&
-        x.metadata.tags.includes('Featured')
+        x.metadata?.tags?.includes('Featured')
       )
     } else {
       return x.name.toLowerCase().includes(searchValue.toLowerCase())
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index 9c2ff14a5..454905332 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -1,11 +1,6 @@
-import { useCallback, useMemo } from 'react'
+import { useCallback } from 'react'
 
-import {
-  DownloadState,
-  HuggingFaceRepoData,
-  Model,
-  Quantization,
-} from '@janhq/core'
+import { DownloadState, HuggingFaceRepoData, Quantization } from '@janhq/core'
 import { Badge, Button, Progress } from '@janhq/joi'
 
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -24,10 +19,7 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 
 import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom'
-import {
-  defaultModelAtom,
-  downloadedModelsAtom,
-} from '@/helpers/atoms/Model.atom'
+import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
 type Props = {
   index: number
@@ -39,7 +31,6 @@ type Props = {
 }
 
 const ModelDownloadRow: React.FC<Props> = ({
-  repoData,
   downloadUrl,
   fileName,
   fileSize = 0,
@@ -56,44 +47,18 @@ const ModelDownloadRow: React.FC<Props> = ({
   const downloadedModel = downloadedModels.find((md) => md.id === fileName)
 
   const setHfImportingStage = useSetAtom(importHuggingFaceModelStageAtom)
-  const defaultModel = useAtomValue(defaultModelAtom)
-
-  const model = useMemo(() => {
-    if (!defaultModel) {
-      return undefined
-    }
-
-    const model: Model = {
-      ...defaultModel,
-      sources: [
-        {
-          url: downloadUrl,
-          filename: fileName,
-        },
-      ],
-      id: fileName,
-      name: fileName,
-      created: Date.now(),
-      metadata: {
-        author: 'User',
-        tags: repoData.tags,
-        size: fileSize,
-      },
-    }
-    return model
-  }, [fileName, fileSize, repoData, downloadUrl, defaultModel])
 
   const onAbortDownloadClick = useCallback(() => {
-    if (model) {
-      abortModelDownload(model)
+    if (downloadUrl) {
+      abortModelDownload(downloadUrl)
     }
-  }, [model, abortModelDownload])
+  }, [downloadUrl, abortModelDownload])
 
   const onDownloadClick = useCallback(async () => {
-    if (model) {
-      downloadModel(model)
+    if (downloadUrl) {
+      downloadModel(downloadUrl)
     }
-  }, [model, downloadModel])
+  }, [downloadUrl, downloadModel])
 
   const onUseModelClick = useCallback(async () => {
     if (assistants.length === 0) {
@@ -111,7 +76,7 @@ const ModelDownloadRow: React.FC<Props> = ({
     setHfImportingStage,
   ])
 
-  if (!model) {
+  if (!downloadUrl) {
     return null
   }
 
@@ -143,7 +108,7 @@ const ModelDownloadRow: React.FC<Props> = ({
           variant="soft"
           className="min-w-[98px]"
           onClick={onUseModelClick}
-          data-testid={`use-model-btn-${model.id}`}
+          data-testid={`use-model-btn-${downloadUrl}`}
         >
           Use
         </Button>
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 7557e9952..6661ed068 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -1,6 +1,6 @@
 import { memo, useState } from 'react'
 
-import { InferenceEngine, ModelFile } from '@janhq/core'
+import { InferenceEngine, Model } from '@janhq/core'
 import { Badge, Button, Tooltip, useClickOutside } from '@janhq/joi'
 import { useAtom } from 'jotai'
 import {
@@ -21,7 +21,7 @@ import { isLocalEngine } from '@/utils/modelEngine'
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 
 type Props = {
-  model: ModelFile
+  model: Model
   groupTitle?: string
 }
 
@@ -78,7 +78,7 @@ const MyModelList = ({ model }: Props) => {
           <div className="flex gap-x-4">
             <div className="md:min-w-[90px] md:max-w-[90px]">
               <Badge theme="secondary" className="sm:mr-8">
-                {toGibibytes(model.metadata.size)}
+                {toGibibytes(model.metadata?.size)}
               </Badge>
             </div>
 
diff --git a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
index 4dab6bfa8..f73efb486 100644
--- a/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/AssistantSetting/index.tsx
@@ -38,20 +38,20 @@ const AssistantSetting: React.FC<Props> = ({ componentData }) => {
         (key === 'chunk_overlap' || key === 'chunk_size')
       ) {
         if (
-          activeThread.assistants[0].tools[0]?.settings.chunk_size <
-          activeThread.assistants[0].tools[0]?.settings.chunk_overlap
+          activeThread.assistants[0].tools[0]?.settings?.chunk_size <
+          activeThread.assistants[0].tools[0]?.settings?.chunk_overlap
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_overlap =
             activeThread.assistants[0].tools[0].settings.chunk_size
         }
         if (
           key === 'chunk_size' &&
-          value < activeThread.assistants[0].tools[0].settings.chunk_overlap
+          value < activeThread.assistants[0].tools[0].settings?.chunk_overlap
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_overlap = value
         } else if (
           key === 'chunk_overlap' &&
-          value > activeThread.assistants[0].tools[0].settings.chunk_size
+          value > activeThread.assistants[0].tools[0].settings?.chunk_size
         ) {
           activeThread.assistants[0].tools[0].settings.chunk_size = value
         }
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 0ef9a9ba1..0adc7ddd4 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -69,7 +69,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
       return x.id === recommendModel[0] || x.id === recommendModel[1]
     } else {
       return (
-        x.metadata.tags.includes('Featured') && x.metadata.size < 5000000000
+        x.metadata?.tags?.includes('Featured') && x.metadata?.size < 5000000000
       )
     }
   })
@@ -143,7 +143,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                     ) : (
                       filteredModels.map((model) => {
                         const isDownloading = downloadingModels.some(
-                          (md) => md.id === model.id
+                          (md) => md === model.id
                         )
                         return (
                           <div
@@ -161,13 +161,15 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                             </div>
                             <div className="flex items-center gap-2 text-[hsla(var(--text-tertiary))]">
                               <span className="font-medium">
-                                {toGibibytes(model.metadata.size)}
+                                {toGibibytes(model.metadata?.size)}
                               </span>
                               {!isDownloading ? (
                                 <DownloadCloudIcon
                                   size={18}
                                   className="cursor-pointer text-[hsla(var(--app-link))]"
-                                  onClick={() => downloadModel(model)}
+                                  onClick={() =>
+                                    downloadModel(model.sources[0].url)
+                                  }
                                 />
                               ) : (
                                 Object.values(downloadStates)
@@ -210,7 +212,7 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
 
                 {featuredModel.slice(0, 2).map((featModel) => {
                   const isDownloading = downloadingModels.some(
-                    (md) => md.id === featModel.id
+                    (md) => md === featModel.id
                   )
                   return (
                     <div
@@ -253,12 +255,14 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                           <Button
                             theme="ghost"
                             className="!bg-[hsla(var(--secondary-bg))]"
-                            onClick={() => downloadModel(featModel)}
+                            onClick={() =>
+                              downloadModel(featModel.sources[0].url)
+                            }
                           >
                             Download
                           </Button>
                           <span className="text-[hsla(var(--text-secondary))]">
-                            {toGibibytes(featModel.metadata.size)}
+                            {toGibibytes(featModel.metadata?.size)}
                           </span>
                         </div>
                       )}
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index afa84b5bf..066c93430 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -155,7 +155,7 @@ const ChatInput = () => {
                     fileUpload.length > 0 ||
                     (activeThread?.assistants[0].tools &&
                       !activeThread?.assistants[0].tools[0]?.enabled &&
-                      !activeThread?.assistants[0].model.settings.vision_model)
+                      !activeThread?.assistants[0].model.settings?.vision_model)
                   ) {
                     e.stopPropagation()
                   } else {
@@ -180,7 +180,7 @@ const ChatInput = () => {
                   (activeThread?.assistants[0].tools &&
                     !activeThread?.assistants[0].tools[0]?.enabled &&
                     !activeThread?.assistants[0].model.settings
-                      .vision_model && (
+                      ?.vision_model && (
                       <>
                         {fileUpload.length !== 0 && (
                           <span>
@@ -221,13 +221,13 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings.vision_model
+                      activeThread?.assistants[0].model.settings?.vision_model
                         ? 'cursor-pointer'
                         : 'cursor-not-allowed opacity-50'
                     )}
                     onClick={() => {
                       if (
-                        activeThread?.assistants[0].model.settings.vision_model
+                        activeThread?.assistants[0].model.settings?.vision_model
                       ) {
                         imageInputRef.current?.click()
                         setShowAttacmentMenus(false)
@@ -240,7 +240,7 @@ const ChatInput = () => {
                 }
                 content="This feature only supports multimodal models."
                 disabled={
-                  activeThread?.assistants[0].model.settings.vision_model
+                  activeThread?.assistants[0].model.settings?.vision_model
                 }
               />
               <Tooltip
@@ -249,7 +249,7 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings.text_model ===
+                      activeThread?.assistants[0].model.settings?.text_model ===
                         false
                         ? 'cursor-not-allowed opacity-50'
                         : 'cursor-pointer'
@@ -257,7 +257,7 @@ const ChatInput = () => {
                     onClick={() => {
                       if (
                         activeThread?.assistants[0].model.settings
-                          .text_model !== false
+                          ?.text_model !== false
                       ) {
                         fileInputRef.current?.click()
                         setShowAttacmentMenus(false)
@@ -271,11 +271,11 @@ const ChatInput = () => {
                 content={
                   (!activeThread?.assistants[0].tools ||
                     !activeThread?.assistants[0].tools[0]?.enabled ||
-                    activeThread?.assistants[0].model.settings.text_model ===
+                    activeThread?.assistants[0].model.settings?.text_model ===
                       false) && (
                     <>
-                      {activeThread?.assistants[0].model.settings.text_model ===
-                      false ? (
+                      {activeThread?.assistants[0].model.settings
+                        ?.text_model === false ? (
                         <span>
                           This model does not support text-based retrieval.
                         </span>
diff --git a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
index cdf865ceb..c4a97a6b9 100644
--- a/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/MessageToolbar/index.tsx
@@ -74,7 +74,7 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
           )[
             messages.filter((msg) => msg.role === ChatCompletionRole.Assistant)
               .length - 1
-          ]?.content[0].text.value,
+          ]?.content[0]?.text.value,
         },
       }
 
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index b12f859bd..fe7993e9a 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -56,7 +56,7 @@ const ThreadCenterPanel = () => {
   const activeThread = useAtomValue(activeThreadAtom)
 
   const acceptedFormat: Accept = activeThread?.assistants[0].model.settings
-    .vision_model
+    ?.vision_model
     ? {
         'application/pdf': ['.pdf'],
         'image/jpeg': ['.jpeg'],
@@ -79,7 +79,7 @@ const ThreadCenterPanel = () => {
         e.dataTransfer.items.length === 1 &&
         ((activeThread?.assistants[0].tools &&
           activeThread?.assistants[0].tools[0]?.enabled) ||
-          activeThread?.assistants[0].model.settings.vision_model)
+          activeThread?.assistants[0].model.settings?.vision_model)
       ) {
         setDragOver(true)
       } else if (
@@ -101,7 +101,7 @@ const ThreadCenterPanel = () => {
         rejectFiles.length !== 0 ||
         (activeThread?.assistants[0].tools &&
           !activeThread?.assistants[0].tools[0]?.enabled &&
-          !activeThread?.assistants[0].model.settings.vision_model)
+          !activeThread?.assistants[0].model.settings?.vision_model)
       )
         return
       const imageType = files[0]?.type.includes('image')
@@ -170,7 +170,7 @@ const ThreadCenterPanel = () => {
                     {isDragReject
                       ? `Currently, we only support 1 attachment at the same time with ${
                           activeThread?.assistants[0].model.settings
-                            .vision_model
+                            ?.vision_model
                             ? 'PDF, JPEG, JPG, PNG'
                             : 'PDF'
                         } format`
@@ -178,7 +178,7 @@ const ThreadCenterPanel = () => {
                   </h6>
                   {!isDragReject && (
                     <p className="mt-2">
-                      {activeThread?.assistants[0].model.settings.vision_model
+                      {activeThread?.assistants[0].model.settings?.vision_model
                         ? 'PDF, JPEG, JPG, PNG'
                         : 'PDF'}
                     </p>
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 0bf917015..7ccc4957a 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -182,8 +182,8 @@ const ThreadRightPanel = () => {
       })
 
       if (
-        activeThread.assistants[0].model.parameters.max_tokens &&
-        activeThread.assistants[0].model.settings.ctx_len
+        activeThread.assistants[0].model.parameters?.max_tokens &&
+        activeThread.assistants[0].model.settings?.ctx_len
       ) {
         if (
           key === 'max_tokens' &&
diff --git a/web/services/appService.test.ts b/web/services/appService.test.ts
index 37053f930..5172ea6ed 100644
--- a/web/services/appService.test.ts
+++ b/web/services/appService.test.ts
@@ -1,30 +1,32 @@
-
-import { ExtensionTypeEnum, extensionManager } from '@/extension';
-import { appService } from './appService';
+import { extensionManager } from '@/extension'
+import { appService } from './appService'
 
 test('should return correct system information when monitoring extension is found', async () => {
-  const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 };
-  const mockOsInfo = { platform: 'win32', release: '10.0.19041' };
+  const mockGpuSetting = { name: 'NVIDIA GeForce GTX 1080', memory: 8192 }
+  const mockOsInfo = { platform: 'win32', release: '10.0.19041' }
   const mockMonitoringExtension = {
     getGpuSetting: jest.fn().mockResolvedValue(mockGpuSetting),
     getOsInfo: jest.fn().mockResolvedValue(mockOsInfo),
-  };
-  extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension);
-  
-  const result = await appService.systemInformation();
-  
-  expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled();
-  expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled();
-  expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo });
-});
+  }
+  extensionManager.get = jest.fn().mockReturnValue(mockMonitoringExtension)
 
+  const result = await appService.systemInformation()
+
+  expect(mockMonitoringExtension.getGpuSetting).toHaveBeenCalled()
+  expect(mockMonitoringExtension.getOsInfo).toHaveBeenCalled()
+  expect(result).toEqual({ gpuSetting: mockGpuSetting, osInfo: mockOsInfo })
+})
 
 test('should log a warning when monitoring extension is not found', async () => {
-  const consoleWarnMock = jest.spyOn(console, 'warn').mockImplementation(() => {});
-  extensionManager.get = jest.fn().mockReturnValue(undefined);
-  
-  await appService.systemInformation();
-  
-  expect(consoleWarnMock).toHaveBeenCalledWith('System monitoring extension not found');
-  consoleWarnMock.mockRestore();
-});
+  const consoleWarnMock = jest
+    .spyOn(console, 'warn')
+    .mockImplementation(() => {})
+  extensionManager.get = jest.fn().mockReturnValue(undefined)
+
+  await appService.systemInformation()
+
+  expect(consoleWarnMock).toHaveBeenCalledWith(
+    'System monitoring extension not found'
+  )
+  consoleWarnMock.mockRestore()
+})

From f3aa40bc0bdfa162f17008edd3affadc9f2e273d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 17 Oct 2024 10:07:31 +0700
Subject: [PATCH 07/71] chore: clean obsolete local provider codes

---
 .../extensions/engines/LocalOAIEngine.ts      | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts
index 6c70fa186..cb5b6760e 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@@ -30,27 +30,6 @@ export abstract class LocalOAIEngine extends OAIEngine {
    * Load the model.
    */
   override async loadModel(model: Model): Promise<void> {
-    if (model.engine.toString() !== this.provider) return
-    // const modelFolder = await dirName(model.file_path)
-    // const systemInfo = await systemInformation()
-    // const res = await executeOnMain(
-    //   this.nodeModule,
-    //   this.loadModelFunctionName,
-    //   {
-    //     modelFolder,
-    //     model,
-    //   },
-    //   systemInfo
-    // )
-
-    // if (res?.error) {
-    //   events.emit(ModelEvent.OnModelFail, { error: res.error })
-    //   return Promise.reject(res.error)
-    // } else {
-    //   this.loadedModel = model
-    //   events.emit(ModelEvent.OnModelReady, model)
-    //   return Promise.resolve()
-    // }
     return Promise.resolve()
   }
   /**

From f44f291bd870462b13a54ed0b3d99dc162ec91d6 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 17 Oct 2024 15:21:00 +0700
Subject: [PATCH 08/71] chore: download progress finished should reload model
 list

---
 extensions/model-extension/src/cortex.ts      |  52 +++++-
 extensions/model-extension/src/index.ts       | 161 +-----------------
 extensions/model-extension/src/model-json.ts  | 132 ++++++++++++++
 web/containers/Providers/EventListener.tsx    |   2 +
 .../Settings/MyModels/MyModelList/index.tsx   |   2 +-
 5 files changed, 194 insertions(+), 155 deletions(-)
 create mode 100644 extensions/model-extension/src/model-json.ts

diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index 685bf3b9f..4945e4756 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -1,9 +1,9 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
 import {
-  DownloadEvent,
   events,
   Model,
+  ModelEvent,
   ModelRuntimeParams,
   ModelSettingParams,
 } from '@janhq/core'
@@ -39,6 +39,11 @@ export class CortexAPI implements ICortexAPI {
     this.subscribeToEvents()
   }
 
+  /**
+   * Fetches a model detail from cortex.cpp
+   * @param model
+   * @returns
+   */
   getModel(model: string): Promise<any> {
     return this.queue.add(() =>
       ky
@@ -48,6 +53,11 @@ export class CortexAPI implements ICortexAPI {
     )
   }
 
+  /**
+   * Fetches models list from cortex.cpp
+   * @param model
+   * @returns
+   */
   getModels(): Promise<Model[]> {
     return this.queue
       .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
@@ -56,6 +66,11 @@ export class CortexAPI implements ICortexAPI {
       )
   }
 
+  /**
+   * Pulls a model from HuggingFace via cortex.cpp
+   * @param model
+   * @returns
+   */
   pullModel(model: string): Promise<void> {
     return this.queue.add(() =>
       ky
@@ -68,6 +83,11 @@ export class CortexAPI implements ICortexAPI {
     )
   }
 
+  /**
+   * Imports a model from a local path via cortex.cpp
+   * @param model
+   * @returns
+   */
   importModel(model: string, modelPath: string): Promise<void> {
     return this.queue.add(() =>
       ky
@@ -78,12 +98,22 @@ export class CortexAPI implements ICortexAPI {
     )
   }
 
+  /**
+   * Deletes a model from cortex.cpp
+   * @param model
+   * @returns
+   */
   deleteModel(model: string): Promise<void> {
     return this.queue.add(() =>
       ky.delete(`${API_URL}/models/${model}`).json().then()
     )
   }
 
+  /**
+   * Update a model in cortex.cpp
+   * @param model
+   * @returns
+   */
   updateModel(model: object): Promise<void> {
     return this.queue.add(() =>
       ky
@@ -92,6 +122,12 @@ export class CortexAPI implements ICortexAPI {
         .then()
     )
   }
+
+  /**
+   * Cancel model pull in cortex.cpp
+   * @param model
+   * @returns
+   */
   cancelModelPull(model: string): Promise<void> {
     return this.queue.add(() =>
       ky
@@ -101,6 +137,10 @@ export class CortexAPI implements ICortexAPI {
     )
   }
 
+  /**
+   * Do health check on cortex.cpp
+   * @returns
+   */
   healthz(): Promise<void> {
     return ky
       .get(`${API_URL}/healthz`, {
@@ -112,6 +152,9 @@ export class CortexAPI implements ICortexAPI {
       .then(() => {})
   }
 
+  /**
+   * Subscribe to cortex.cpp websocket events
+   */
   subscribeToEvents() {
     this.queue.add(
       () =>
@@ -140,12 +183,19 @@ export class CortexAPI implements ICortexAPI {
                 total: total,
               },
             })
+            // Update models list from Hub
+            events.emit(ModelEvent.OnModelsUpdate, {})
           })
           resolve()
         })
     )
   }
 
+  /**
+   * TRansform model to the expected format (e.g. parameters, settings, metadata)
+   * @param model
+   * @returns
+   */
   private transformModel(model: any) {
     model.parameters = setParameters<ModelRuntimeParams>(model)
     model.settings = setParameters<ModelSettingParams>(model)
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index b879e0bb9..c154c3754 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -2,21 +2,14 @@ import {
   ModelExtension,
   Model,
   InferenceEngine,
-  fs,
   joinPath,
   dirName,
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
+import { scanModelsFolder } from './model-json'
 
 declare const SETTINGS: Array<any>
 
-/**
- * TODO: Set env for HF access token? or via API request?
- */
-enum Settings {
-  huggingFaceAccessToken = 'hugging-face-access-token',
-}
-
 /**
  * Extension enum
  */
@@ -28,7 +21,6 @@ enum ExtensionEnum {
  * A extension for models
  */
 export default class JanModelExtension extends ModelExtension {
-  private static readonly _homeDir = 'file://models'
   cortexAPI: CortexAPI = new CortexAPI()
 
   /**
@@ -59,7 +51,7 @@ export default class JanModelExtension extends ModelExtension {
     /**
      * Sending POST to /models/pull/{id} endpoint to pull the model
      */
-    return this.cortexAPI?.pullModel(model)
+    return this.cortexAPI.pullModel(model)
   }
 
   /**
@@ -72,7 +64,7 @@ export default class JanModelExtension extends ModelExtension {
     /**
      * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
      */
-    this.cortexAPI?.cancelModelPull(model)
+    this.cortexAPI.cancelModelPull(model)
   }
 
   /**
@@ -81,7 +73,7 @@ export default class JanModelExtension extends ModelExtension {
    * @returns A Promise that resolves when the model is deleted.
    */
   async deleteModel(model: string): Promise<void> {
-    return this.cortexAPI?.deleteModel(model)
+    return this.cortexAPI.deleteModel(model)
   }
 
   /**
@@ -99,7 +91,7 @@ export default class JanModelExtension extends ModelExtension {
       // Updated from an older version than 0.5.5
       // Scan through the models folder and import them (Legacy flow)
       // Return models immediately
-      return this.scanModelsFolder().then((models) => {
+      return scanModelsFolder().then((models) => {
         return models ?? []
       })
     }
@@ -123,7 +115,7 @@ export default class JanModelExtension extends ModelExtension {
       (e) => e.engine === InferenceEngine.nitro
     )
 
-    await this.cortexAPI?.getModels().then((models) => {
+    await this.cortexAPI.getModels().then((models) => {
       const existingIds = models.map((e) => e.id)
       toImportModels = toImportModels.filter(
         (e: Model) => !existingIds.includes(e.id)
@@ -161,7 +153,7 @@ export default class JanModelExtension extends ModelExtension {
      * just return models from cortex.cpp
      */
     return (
-      this.cortexAPI?.getModels().then((models) => {
+      this.cortexAPI.getModels().then((models) => {
         return models
       }) ?? Promise.resolve([])
     )
@@ -183,143 +175,6 @@ export default class JanModelExtension extends ModelExtension {
    * @param optionType
    */
   async importModel(model: string, modelPath: string): Promise<void> {
-    return this.cortexAPI?.importModel(model, modelPath)
+    return this.cortexAPI.importModel(model, modelPath)
   }
-
-  //// LEGACY MODEL FOLDER ////
-  /**
-   * Scan through models folder and return downloaded models
-   * @returns
-   */
-  private async scanModelsFolder(): Promise<Model[]> {
-    try {
-      if (!(await fs.existsSync(JanModelExtension._homeDir))) {
-        console.debug('Model folder not found')
-        return []
-      }
-
-      const files: string[] = await fs.readdirSync(JanModelExtension._homeDir)
-
-      const allDirectories: string[] = []
-
-      for (const modelFolder of files) {
-        const fullModelFolderPath = await joinPath([
-          JanModelExtension._homeDir,
-          modelFolder,
-        ])
-        if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue
-        allDirectories.push(modelFolder)
-      }
-
-      const readJsonPromises = allDirectories.map(async (dirName) => {
-        // filter out directories that don't match the selector
-        // read model.json
-        const folderFullPath = await joinPath([
-          JanModelExtension._homeDir,
-          dirName,
-        ])
-
-        const jsonPath = await this.getModelJsonPath(folderFullPath)
-
-        if (await fs.existsSync(jsonPath)) {
-          // if we have the model.json file, read it
-          let model = await fs.readFileSync(jsonPath, 'utf-8')
-
-          model = typeof model === 'object' ? model : JSON.parse(model)
-
-          // This to ensure backward compatibility with `model.json` with `source_url`
-          if (model['source_url'] != null) {
-            model['sources'] = [
-              {
-                filename: model.id,
-                url: model['source_url'],
-              },
-            ]
-          }
-          model.file_path = jsonPath
-          model.file_name = 'model.json'
-
-          // Check model file exist
-          // model binaries (sources) are absolute path & exist (symlinked)
-          const existFiles = await Promise.all(
-            model.sources.map(
-              (source) =>
-                // Supposed to be a local file url
-                !source.url.startsWith(`http://`) &&
-                !source.url.startsWith(`https://`)
-            )
-          )
-          if (existFiles.every((exist) => exist)) return true
-
-          const result = await fs
-            .readdirSync(await joinPath([JanModelExtension._homeDir, dirName]))
-            .then((files: string[]) => {
-              // Model binary exists in the directory
-              // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
-              return (
-                files.includes(dirName) || // Legacy model GGUF without extension
-                files.filter((file) => {
-                  return (
-                    file.toLowerCase().endsWith('.gguf') || // GGUF
-                    file.toLowerCase().endsWith('.engine') // Tensort-LLM
-                  )
-                })?.length > 0 // TODO: find better way (can use basename to check the file name with source url)
-              )
-            })
-
-          if (result) return model
-          else return undefined
-        }
-      })
-      const results = await Promise.allSettled(readJsonPromises)
-      const modelData = results
-        .map((result) => {
-          if (result.status === 'fulfilled' && result.value) {
-            try {
-              const model =
-                typeof result.value === 'object'
-                  ? result.value
-                  : JSON.parse(result.value)
-              return model as Model
-            } catch {
-              console.debug(`Unable to parse model metadata: ${result.value}`)
-            }
-          }
-          return undefined
-        })
-        .filter((e) => !!e)
-
-      return modelData
-    } catch (err) {
-      console.error(err)
-      return []
-    }
-  }
-
-  /**
-   * Retrieve the model.json path from a folder
-   * @param folderFullPath
-   * @returns
-   */
-  private async getModelJsonPath(
-    folderFullPath: string
-  ): Promise<string | undefined> {
-    // try to find model.json recursively inside each folder
-    if (!(await fs.existsSync(folderFullPath))) return undefined
-    const files: string[] = await fs.readdirSync(folderFullPath)
-    if (files.length === 0) return undefined
-    if (files.includes('model.json')) {
-      return joinPath([folderFullPath, 'model.json'])
-    }
-    // continue recursive
-    for (const file of files) {
-      const path = await joinPath([folderFullPath, file])
-      const fileStats = await fs.fileStat(path)
-      if (fileStats.isDirectory) {
-        const result = await this.getModelJsonPath(path)
-        if (result) return result
-      }
-    }
-  }
-  //// END LEGACY MODEL FOLDER ////
 }
diff --git a/extensions/model-extension/src/model-json.ts b/extensions/model-extension/src/model-json.ts
new file mode 100644
index 000000000..af6f95b36
--- /dev/null
+++ b/extensions/model-extension/src/model-json.ts
@@ -0,0 +1,132 @@
+import { Model, fs, joinPath } from '@janhq/core'
+//// LEGACY MODEL FOLDER ////
+/**
+ * Scan through models folder and return downloaded models
+ * @returns
+ */
+export const scanModelsFolder = async (): Promise<Model[]> => {
+  const _homeDir = 'file://models'
+  try {
+    if (!(await fs.existsSync(_homeDir))) {
+      console.debug('Model folder not found')
+      return []
+    }
+
+    const files: string[] = await fs.readdirSync(_homeDir)
+
+    const allDirectories: string[] = []
+
+    for (const modelFolder of files) {
+      const fullModelFolderPath = await joinPath([_homeDir, modelFolder])
+      if (!(await fs.fileStat(fullModelFolderPath)).isDirectory) continue
+      allDirectories.push(modelFolder)
+    }
+
+    const readJsonPromises = allDirectories.map(async (dirName) => {
+      // filter out directories that don't match the selector
+      // read model.json
+      const folderFullPath = await joinPath([_homeDir, dirName])
+
+      const jsonPath = await getModelJsonPath(folderFullPath)
+
+      if (await fs.existsSync(jsonPath)) {
+        // if we have the model.json file, read it
+        let model = await fs.readFileSync(jsonPath, 'utf-8')
+
+        model = typeof model === 'object' ? model : JSON.parse(model)
+
+        // This to ensure backward compatibility with `model.json` with `source_url`
+        if (model['source_url'] != null) {
+          model['sources'] = [
+            {
+              filename: model.id,
+              url: model['source_url'],
+            },
+          ]
+        }
+        model.file_path = jsonPath
+        model.file_name = 'model.json'
+
+        // Check model file exist
+        // model binaries (sources) are absolute path & exist (symlinked)
+        const existFiles = await Promise.all(
+          model.sources.map(
+            (source) =>
+              // Supposed to be a local file url
+              !source.url.startsWith(`http://`) &&
+              !source.url.startsWith(`https://`)
+          )
+        )
+        if (existFiles.every((exist) => exist)) return true
+
+        const result = await fs
+          .readdirSync(await joinPath([_homeDir, dirName]))
+          .then((files: string[]) => {
+            // Model binary exists in the directory
+            // Model binary name can match model ID or be a .gguf file and not be an incompleted model file
+            return (
+              files.includes(dirName) || // Legacy model GGUF without extension
+              files.filter((file) => {
+                return (
+                  file.toLowerCase().endsWith('.gguf') || // GGUF
+                  file.toLowerCase().endsWith('.engine') // Tensort-LLM
+                )
+              })?.length > 0 // TODO: find better way (can use basename to check the file name with source url)
+            )
+          })
+
+        if (result) return model
+        else return undefined
+      }
+    })
+    const results = await Promise.allSettled(readJsonPromises)
+    const modelData = results
+      .map((result) => {
+        if (result.status === 'fulfilled' && result.value) {
+          try {
+            const model =
+              typeof result.value === 'object'
+                ? result.value
+                : JSON.parse(result.value)
+            return model as Model
+          } catch {
+            console.debug(`Unable to parse model metadata: ${result.value}`)
+          }
+        }
+        return undefined
+      })
+      .filter((e) => !!e)
+
+    return modelData
+  } catch (err) {
+    console.error(err)
+    return []
+  }
+}
+
+/**
+ * Retrieve the model.json path from a folder
+ * @param folderFullPath
+ * @returns
+ */
+export const getModelJsonPath = async (
+  folderFullPath: string
+): Promise<string | undefined> => {
+  // try to find model.json recursively inside each folder
+  if (!(await fs.existsSync(folderFullPath))) return undefined
+  const files: string[] = await fs.readdirSync(folderFullPath)
+  if (files.length === 0) return undefined
+  if (files.includes('model.json')) {
+    return joinPath([folderFullPath, 'model.json'])
+  }
+  // continue recursive
+  for (const file of files) {
+    const path = await joinPath([folderFullPath, file])
+    const fileStats = await fs.fileStat(path)
+    if (fileStats.isDirectory) {
+      const result = await getModelJsonPath(path)
+      if (result) return result
+    }
+  }
+}
+//// END LEGACY MODEL FOLDER ////
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 608160555..1832256e2 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -111,6 +111,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
       events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
       events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
       events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
     }
   }, [
@@ -118,6 +119,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
     onFileDownloadError,
     onFileDownloadSuccess,
     onFileUnzipSuccess,
+    onFileDownloadStopped,
   ])
 
   return (
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 6661ed068..756520107 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -78,7 +78,7 @@ const MyModelList = ({ model }: Props) => {
           <div className="flex gap-x-4">
             <div className="md:min-w-[90px] md:max-w-[90px]">
               <Badge theme="secondary" className="sm:mr-8">
-                {toGibibytes(model.metadata?.size)}
+                {model.metadata?.size ? toGibibytes(model.metadata?.size) : '-'}
               </Badge>
             </div>
 

From 03e15fb70fa9bacd901dd3e31de49b31594c4f61 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 12:18:14 +0700
Subject: [PATCH 09/71] feat: sync model hub and download progress from
 cortex.cpp

---
 .husky/pre-commit                             |  2 +-
 core/src/browser/extensions/model.ts          |  2 +-
 core/src/types/model/modelInterface.ts        |  5 +-
 core/src/types/monitoring/index.test.ts       | 25 +++--
 .../src/node/index.ts                         | 13 ++-
 extensions/model-extension/rollup.config.ts   |  2 +
 .../model-extension/src/@types/global.d.ts    |  2 +
 extensions/model-extension/src/cortex.ts      | 24 ++---
 extensions/model-extension/src/index.ts       |  4 +-
 web/containers/ModalCancelDownload/index.tsx  | 22 ++---
 web/containers/ModelDropdown/index.tsx        | 10 +-
 web/containers/Providers/EventListener.tsx    | 10 ++
 web/hooks/useDownloadModel.ts                 | 21 ++---
 web/hooks/useGetHFRepoData.ts                 |  7 +-
 web/hooks/useSendChatMessage.ts               |  3 +-
 .../Hub/ModelList/ModelHeader/index.tsx       | 13 +--
 .../ModelDownloadRow/index.tsx                |  5 +-
 .../ChatBody/OnDeviceStarterScreen/index.tsx  | 10 +-
 web/services/restService.ts                   |  2 +-
 web/utils/huggingface.ts                      | 93 +++++++++++++++++++
 web/utils/model.ts                            |  3 +
 21 files changed, 192 insertions(+), 86 deletions(-)
 create mode 100644 web/utils/huggingface.ts
 create mode 100644 web/utils/model.ts

diff --git a/.husky/pre-commit b/.husky/pre-commit
index a4aa5add4..177cd4216 100644
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -1 +1 @@
-npm run lint --fix
\ No newline at end of file
+oxlint --fix || npm run lint --fix
\ No newline at end of file
diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index d111c1d3a..f3609b3b2 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -13,7 +13,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
   }
 
   abstract getModels(): Promise<Model[]>
-  abstract pullModel(model: string): Promise<void>
+  abstract pullModel(model: string, id?: string): Promise<void>
   abstract cancelModelPull(modelId: string): Promise<void>
   abstract importModel(model: string, modePath: string): Promise<void>
   abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index 088118f69..b676db949 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,5 +1,4 @@
 import { Model } from './modelEntity'
-import { OptionType } from './modelImport'
 
 /**
  * Model extension for managing models.
@@ -10,14 +9,14 @@ export interface ModelInterface {
    * @param model - The model to download.
    * @returns A Promise that resolves when the model has been downloaded.
    */
-  pullModel(model: string): Promise<void>
+  pullModel(model: string, id?: string): Promise<void>
 
   /**
    * Cancels the download of a specific model.
    * @param {string} modelId - The ID of the model to cancel the download for.
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
-  cancelModelPull(modelId: string): Promise<void>
+  cancelModelPull(model: string): Promise<void>
 
   /**
    * Deletes a model.
diff --git a/core/src/types/monitoring/index.test.ts b/core/src/types/monitoring/index.test.ts
index 010fcb97a..56c5879e4 100644
--- a/core/src/types/monitoring/index.test.ts
+++ b/core/src/types/monitoring/index.test.ts
@@ -1,16 +1,13 @@
+import * as monitoringInterface from './monitoringInterface'
+import * as resourceInfo from './resourceInfo'
 
-import * as monitoringInterface from './monitoringInterface';
-import * as resourceInfo from './resourceInfo';
+import * as index from './index'
 
-    import * as index from './index';
-    import * as monitoringInterface from './monitoringInterface';
-    import * as resourceInfo from './resourceInfo';
-    
-    it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
-      for (const key in monitoringInterface) {
-        expect(index[key]).toBe(monitoringInterface[key]);
-      }
-      for (const key in resourceInfo) {
-        expect(index[key]).toBe(resourceInfo[key]);
-      }
-    });
+it('should re-export all symbols from monitoringInterface and resourceInfo', () => {
+  for (const key in monitoringInterface) {
+    expect(index[key]).toBe(monitoringInterface[key])
+  }
+  for (const key in resourceInfo) {
+    expect(index[key]).toBe(resourceInfo[key])
+  }
+})
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index f1c365ade..788318c84 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -1,5 +1,5 @@
 import path from 'path'
-import { log, SystemInformation } from '@janhq/core/node'
+import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
 import { executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
 
@@ -40,9 +40,18 @@ function run(systemInfo?: SystemInformation): Promise<any> {
       executableOptions.enginePath
     )
 
+    const dataFolderPath = getJanDataFolderPath()
     watchdog = new ProcessWatchdog(
       executableOptions.executablePath,
-      ['--start-server', '--port', LOCAL_PORT.toString()],
+      [
+        '--start-server',
+        '--port',
+        LOCAL_PORT.toString(),
+        '--config_file_path',
+        `${path.join(dataFolderPath, '.janrc')}`,
+        '--data_folder_path',
+        dataFolderPath,
+      ],
       {
         cwd: executableOptions.enginePath,
         env: {
diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts
index 6e506140f..781c4df84 100644
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@@ -20,6 +20,8 @@ export default [
       replace({
         preventAssignment: true,
         SETTINGS: JSON.stringify(settingJson),
+        API_URL: 'http://127.0.0.1:39291',
+        SOCKET_URL: 'ws://127.0.0.1:39291',
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/model-extension/src/@types/global.d.ts b/extensions/model-extension/src/@types/global.d.ts
index 01bd272f2..bff3811e3 100644
--- a/extensions/model-extension/src/@types/global.d.ts
+++ b/extensions/model-extension/src/@types/global.d.ts
@@ -1,6 +1,8 @@
 export {}
 declare global {
   declare const NODE: string
+  declare const API_URL: string
+  declare const SOCKET_URL: string
 
   interface Core {
     api: APIFunctions
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index 4945e4756..b0acd6d08 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -1,6 +1,7 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
 import {
+  DownloadEvent,
   events,
   Model,
   ModelEvent,
@@ -13,18 +14,12 @@ import {
 interface ICortexAPI {
   getModel(model: string): Promise<Model>
   getModels(): Promise<Model[]>
-  pullModel(model: string): Promise<void>
+  pullModel(model: string, id?: string): Promise<void>
   importModel(path: string, modelPath: string): Promise<void>
   deleteModel(model: string): Promise<void>
   updateModel(model: object): Promise<void>
   cancelModelPull(model: string): Promise<void>
 }
-/**
- * Simple CortexAPI service
- * It could be replaced by cortex client sdk later on
- */
-const API_URL = 'http://127.0.0.1:39291'
-const SOCKET_URL = 'ws://127.0.0.1:39291'
 
 type ModelList = {
   data: any[]
@@ -71,10 +66,10 @@ export class CortexAPI implements ICortexAPI {
    * @param model
    * @returns
    */
-  pullModel(model: string): Promise<void> {
+  pullModel(model: string, id?: string): Promise<void> {
     return this.queue.add(() =>
       ky
-        .post(`${API_URL}/v1/models/pull`, { json: { model } })
+        .post(`${API_URL}/v1/models/pull`, { json: { model, id } })
         .json()
         .catch(async (e) => {
           throw (await e.response?.json()) ?? e
@@ -160,7 +155,6 @@ export class CortexAPI implements ICortexAPI {
       () =>
         new Promise<void>((resolve) => {
           this.socket = new WebSocket(`${SOCKET_URL}/events`)
-          console.log('Socket connected')
 
           this.socket.addEventListener('message', (event) => {
             const data = JSON.parse(event.data)
@@ -173,7 +167,7 @@ export class CortexAPI implements ICortexAPI {
               (accumulator, currentValue) => accumulator + currentValue.bytes,
               0
             )
-            const percent = ((transferred ?? 1) / (total ?? 1)) * 100
+            const percent = (transferred / total || 0) * 100
 
             events.emit(data.type, {
               modelId: data.task.id,
@@ -184,7 +178,13 @@ export class CortexAPI implements ICortexAPI {
               },
             })
             // Update models list from Hub
-            events.emit(ModelEvent.OnModelsUpdate, {})
+            if (data.type === DownloadEvent.onFileDownloadSuccess) {
+              // Delay for the state update from cortex.cpp
+              // Just to be sure
+              setTimeout(() => {
+                events.emit(ModelEvent.OnModelsUpdate, {})
+              }, 500)
+            }
           })
           resolve()
         })
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index c154c3754..38fd0634a 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -47,11 +47,11 @@ export default class JanModelExtension extends ModelExtension {
    * @param model - The model to download.
    * @returns A Promise that resolves when the model is downloaded.
    */
-  async pullModel(model: string): Promise<void> {
+  async pullModel(model: string, id?: string): Promise<void> {
     /**
      * Sending POST to /models/pull/{id} endpoint to pull the model
      */
-    return this.cortexAPI.pullModel(model)
+    return this.cortexAPI.pullModel(model, id)
   }
 
   /**
diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx
index fdc583911..8a92c9279 100644
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@@ -4,7 +4,7 @@ import { Model } from '@janhq/core'
 
 import { Modal, Button, Progress, ModalClose } from '@janhq/joi'
 
-import { useAtomValue } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'
 
 import useDownloadModel from '@/hooks/useDownloadModel'
 
@@ -12,7 +12,7 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
-import { getDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
+import { removeDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
 
 type Props = {
   model: Model
@@ -21,20 +21,16 @@ type Props = {
 
 const ModalCancelDownload = ({ model, isFromList }: Props) => {
   const { abortModelDownload } = useDownloadModel()
-  const downloadingModels = useAtomValue(getDownloadingModelAtom)
+  const removeModelDownload = useSetAtom(removeDownloadingModelAtom)
   const allDownloadStates = useAtomValue(modelDownloadStateAtom)
   const downloadState = allDownloadStates[model.id]
 
-  const cancelText = `Cancel ${formatDownloadPercentage(downloadState.percent)}`
+  const cancelText = `Cancel ${formatDownloadPercentage(downloadState?.percent ?? 0)}`
 
   const onAbortDownloadClick = useCallback(() => {
-    if (downloadState?.modelId) {
-      const model = downloadingModels.find(
-        (model) => model === downloadState.modelId
-      )
-      if (model) abortModelDownload(model)
-    }
-  }, [downloadState, downloadingModels, abortModelDownload])
+    removeModelDownload(model.id)
+    abortModelDownload(downloadState?.modelId ?? model.id)
+  }, [downloadState, abortModelDownload, removeModelDownload, model])
 
   return (
     <Modal
@@ -51,13 +47,13 @@ const ModalCancelDownload = ({ model, isFromList }: Props) => {
               <Progress
                 className="w-[80px]"
                 value={
-                  formatDownloadPercentage(downloadState?.percent, {
+                  formatDownloadPercentage(downloadState?.percent ?? 0, {
                     hidePercentage: true,
                   }) as number
                 }
               />
               <span className="tabular-nums">
-                {formatDownloadPercentage(downloadState.percent)}
+                {formatDownloadPercentage(downloadState?.percent ?? 0)}
               </span>
             </div>
           </Button>
diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index 7415f1165..a5874b3de 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -472,7 +472,10 @@ const ModelDropdown = ({
                                       size={18}
                                       className="cursor-pointer text-[hsla(var(--app-link))]"
                                       onClick={() =>
-                                        downloadModel(model.sources[0].url)
+                                        downloadModel(
+                                          model.sources[0].url,
+                                          model.id
+                                        )
                                       }
                                     />
                                   ) : (
@@ -559,7 +562,10 @@ const ModelDropdown = ({
                                     size={18}
                                     className="cursor-pointer text-[hsla(var(--app-link))]"
                                     onClick={() =>
-                                      downloadModel(model.sources[0].url)
+                                      downloadModel(
+                                        model.sources[0].url,
+                                        model.id
+                                      )
                                     }
                                   />
                                 ) : (
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 1832256e2..5df59b0fd 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -23,11 +23,17 @@ import {
   removeInstallingExtensionAtom,
   setInstallingExtensionAtom,
 } from '@/helpers/atoms/Extension.atom'
+import {
+  addDownloadingModelAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 const EventListenerWrapper = ({ children }: PropsWithChildren) => {
   const setDownloadState = useSetAtom(setDownloadStateAtom)
   const setInstallingExtension = useSetAtom(setInstallingExtensionAtom)
   const removeInstallingExtension = useSetAtom(removeInstallingExtensionAtom)
+  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
 
   const onFileDownloadUpdate = useCallback(
     async (state: DownloadState) => {
@@ -40,6 +46,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         }
         setInstallingExtension(state.extensionId!, installingExtensionState)
       } else {
+        addDownloadingModel(state.modelId)
         setDownloadState(state)
       }
     },
@@ -54,6 +61,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       } else {
         state.downloadState = 'error'
         setDownloadState(state)
+        removeDownloadingModel(state.modelId)
       }
     },
     [setDownloadState, removeInstallingExtension]
@@ -68,6 +76,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         state.downloadState = 'error'
         state.error = 'aborted'
         setDownloadState(state)
+        removeDownloadingModel(state.modelId)
       }
     },
     [setDownloadState, removeInstallingExtension]
@@ -79,6 +88,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       if (state.downloadType !== 'extension') {
         state.downloadState = 'end'
         setDownloadState(state)
+        removeDownloadingModel(state.modelId)
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
     },
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 82ce593e2..3b25cb86f 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -1,11 +1,6 @@
 import { useCallback } from 'react'
 
-import {
-  events,
-  ExtensionTypeEnum,
-  ModelEvent,
-  ModelExtension,
-} from '@janhq/core'
+import { ExtensionTypeEnum, ModelExtension } from '@janhq/core'
 
 import { useSetAtom } from 'jotai'
 
@@ -19,13 +14,13 @@ import {
 } from '@/helpers/atoms/Model.atom'
 
 export default function useDownloadModel() {
-  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
   const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
+  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
 
   const downloadModel = useCallback(
-    async (model: string) => {
-      addDownloadingModel(model)
-      localDownloadModel(model).catch((error) => {
+    async (model: string, id?: string) => {
+      addDownloadingModel(id ?? model)
+      downloadLocalModel(model, id).catch((error) => {
         if (error.message) {
           toaster({
             title: 'Download failed',
@@ -37,7 +32,7 @@ export default function useDownloadModel() {
         removeDownloadingModel(model)
       })
     },
-    [addDownloadingModel]
+    [removeDownloadingModel, addDownloadingModel]
   )
 
   const abortModelDownload = useCallback(async (model: string) => {
@@ -50,10 +45,10 @@ export default function useDownloadModel() {
   }
 }
 
-const localDownloadModel = async (model: string) =>
+const downloadLocalModel = async (model: string, id?: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.pullModel(model)
+    ?.pullModel(model, id)
 
 const cancelModelDownload = async (model: string) =>
   extensionManager
diff --git a/web/hooks/useGetHFRepoData.ts b/web/hooks/useGetHFRepoData.ts
index 4e3308116..6f2ec2b57 100644
--- a/web/hooks/useGetHFRepoData.ts
+++ b/web/hooks/useGetHFRepoData.ts
@@ -2,6 +2,8 @@ import { useCallback, useState } from 'react'
 
 import { HuggingFaceRepoData } from '@janhq/core'
 
+import { fetchHuggingFaceRepoData } from '@/utils/huggingface'
+
 export const useGetHFRepoData = () => {
   const [error, setError] = useState<string | undefined>(undefined)
   const [loading, setLoading] = useState(false)
@@ -29,8 +31,5 @@ export const useGetHFRepoData = () => {
 const extensionGetHfRepoData = async (
   repoId: string
 ): Promise<HuggingFaceRepoData | undefined> => {
-  return Promise.resolve(undefined)
-  // return extensionManager
-  //   .get<ModelExtension>(ExtensionTypeEnum.Model)
-  //   ?.fetchHuggingFaceRepoData(repoId)
+  return fetchHuggingFaceRepoData(repoId)
 }
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index bab515a30..4bc91cad2 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -216,7 +216,7 @@ export default function useSendChatMessage() {
       ...activeThreadRef.current,
       updated: newMessage.created,
       metadata: {
-        ...(activeThreadRef.current.metadata ?? {}),
+        ...activeThreadRef.current.metadata,
         lastMessage: prompt,
       },
     }
@@ -256,7 +256,6 @@ export default function useSendChatMessage() {
     )
     request.messages = normalizeMessages(request.messages ?? [])
 
-    console.log(requestBuilder.model?.engine ?? modelRequest.engine, request)
     // Request for inference
     EngineManager.instance()
       .get(requestBuilder.model?.engine ?? modelRequest.engine ?? '')
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index ce5a12957..725b0216a 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
   const assistants = useAtomValue(assistantsAtom)
 
   const onDownloadClick = useCallback(() => {
-    downloadModel(model.sources[0].url)
+    downloadModel(model.sources[0].url, model.id)
   }, [model, downloadModel])
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
@@ -123,17 +123,6 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
       className="cursor-pointer rounded-t-md bg-[hsla(var(--app-bg))]"
       onClick={onClick}
     >
-      {/* TODO: @faisal are we still using cover? */}
-      {/* {model.metadata.cover && imageLoaded && (
-        <div className="relative h-full w-full">
-          <img
-            onError={() => setImageLoaded(false)}
-            src={model.metadata.cover}
-            className="h-[250px] w-full object-cover"
-            alt={`Cover - ${model.id}`}
-          />
-        </div>
-      )} */}
       <div className="flex items-center justify-between px-4 py-2">
         <div className="flex items-center gap-2">
           <span className="line-clamp-1 text-base font-semibold">
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index 454905332..03413006f 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -20,6 +20,7 @@ import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 
 import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+import { normalizeModelId } from '@/utils/model'
 
 type Props = {
   index: number
@@ -50,13 +51,13 @@ const ModelDownloadRow: React.FC<Props> = ({
 
   const onAbortDownloadClick = useCallback(() => {
     if (downloadUrl) {
-      abortModelDownload(downloadUrl)
+      abortModelDownload(normalizeModelId(downloadUrl))
     }
   }, [downloadUrl, abortModelDownload])
 
   const onDownloadClick = useCallback(async () => {
     if (downloadUrl) {
-      downloadModel(downloadUrl)
+      downloadModel(downloadUrl, normalizeModelId(downloadUrl))
     }
   }, [downloadUrl, downloadModel])
 
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 0adc7ddd4..366575a40 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -168,7 +168,10 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                                   size={18}
                                   className="cursor-pointer text-[hsla(var(--app-link))]"
                                   onClick={() =>
-                                    downloadModel(model.sources[0].url)
+                                    downloadModel(
+                                      model.sources[0].url,
+                                      model.id
+                                    )
                                   }
                                 />
                               ) : (
@@ -256,7 +259,10 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                             theme="ghost"
                             className="!bg-[hsla(var(--secondary-bg))]"
                             onClick={() =>
-                              downloadModel(featModel.sources[0].url)
+                              downloadModel(
+                                featModel.sources[0].url,
+                                featModel.id
+                              )
                             }
                           >
                             Download
diff --git a/web/services/restService.ts b/web/services/restService.ts
index 73348caeb..3c1cfc6a8 100644
--- a/web/services/restService.ts
+++ b/web/services/restService.ts
@@ -9,7 +9,7 @@ export function openExternalUrl(url: string) {
 }
 
 // Define API routes based on different route types
-export const APIRoutes = [...CoreRoutes.map((r) => ({ path: `app`, route: r }))]
+export const APIRoutes = CoreRoutes.map((r) => ({ path: `app`, route: r }))
 
 // Define the restAPI object with methods for each API route
 export const restAPI = {
diff --git a/web/utils/huggingface.ts b/web/utils/huggingface.ts
new file mode 100644
index 000000000..328d684e6
--- /dev/null
+++ b/web/utils/huggingface.ts
@@ -0,0 +1,93 @@
+import { AllQuantizations, getFileSize, HuggingFaceRepoData } from '@janhq/core'
+
+export const fetchHuggingFaceRepoData = async (
+  repoId: string,
+  huggingFaceAccessToken?: string
+): Promise<HuggingFaceRepoData> => {
+  const sanitizedUrl = toHuggingFaceUrl(repoId)
+  console.debug('sanitizedUrl', sanitizedUrl)
+
+  const headers: Record<string, string> = {
+    Accept: 'application/json',
+  }
+
+  if (huggingFaceAccessToken && huggingFaceAccessToken.length > 0) {
+    headers['Authorization'] = `Bearer ${huggingFaceAccessToken}`
+  }
+
+  const res = await fetch(sanitizedUrl, {
+    headers: headers,
+  })
+  const response = await res.json()
+  if (response['error'] != null) {
+    throw new Error(response['error'])
+  }
+
+  const data = response as HuggingFaceRepoData
+
+  if (data.tags.indexOf('gguf') === -1) {
+    throw new Error(
+      `${repoId} is not supported. Only GGUF models are supported.`
+    )
+  }
+
+  const promises: Promise<number>[] = []
+
+  // fetching file sizes
+  const url = new URL(sanitizedUrl)
+  const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
+
+  for (const sibling of data.siblings) {
+    const downloadUrl = `https://huggingface.co/${paths[2]}/${paths[3]}/resolve/main/${sibling.rfilename}`
+    sibling.downloadUrl = downloadUrl
+    promises.push(getFileSize(downloadUrl))
+  }
+
+  const result = await Promise.all(promises)
+  for (let i = 0; i < data.siblings.length; i++) {
+    data.siblings[i].fileSize = result[i]
+  }
+
+  AllQuantizations.forEach((quantization) => {
+    data.siblings.forEach((sibling) => {
+      if (!sibling.quantization && sibling.rfilename.includes(quantization)) {
+        sibling.quantization = quantization
+      }
+    })
+  })
+
+  data.modelUrl = `https://huggingface.co/${paths[2]}/${paths[3]}`
+  return data
+}
+
+function toHuggingFaceUrl(repoId: string): string {
+  try {
+    const url = new URL(repoId)
+    if (url.host !== 'huggingface.co') {
+      throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
+    }
+
+    const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
+    if (paths.length < 2) {
+      throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
+    }
+
+    return `${url.origin}/api/models/${paths[0]}/${paths[1]}`
+  } catch (err) {
+    if (err instanceof InvalidHostError) {
+      throw err
+    }
+
+    if (repoId.startsWith('https')) {
+      throw new Error(`Cannot parse url: ${repoId}`)
+    }
+
+    return `https://huggingface.co/api/models/${repoId}`
+  }
+}
+class InvalidHostError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = 'InvalidHostError'
+  }
+}
diff --git a/web/utils/model.ts b/web/utils/model.ts
new file mode 100644
index 000000000..00efc1155
--- /dev/null
+++ b/web/utils/model.ts
@@ -0,0 +1,3 @@
+export const normalizeModelId = (downloadUrl: string): string => {
+  return downloadUrl.split('/').pop() ?? downloadUrl
+}

From ba59425e6aa0808cd4d5fa86230d43b0d4260e5c Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 16:14:41 +0700
Subject: [PATCH 10/71] fix: tests

---
 .../extensions/engines/LocalOAIEngine.test.ts | 38 +---------------
 .../extensions/engines/LocalOAIEngine.ts      |  7 +--
 core/src/node/api/processors/download.test.ts |  3 +-
 core/src/types/model/modelEntity.ts           |  2 +-
 extensions/model-extension/rollup.config.ts   |  4 +-
 web/helpers/atoms/Model.atom.test.ts          | 32 +++++++++----
 web/helpers/atoms/Model.atom.ts               |  4 +-
 web/hooks/useDeleteModel.test.ts              |  4 +-
 web/hooks/useDownloadModel.test.ts            | 45 +++++++++----------
 web/hooks/useGetHFRepoData.test.ts            | 12 ++---
 web/hooks/useImportModel.test.ts              | 11 ++---
 web/hooks/useImportModel.ts                   |  1 +
 web/hooks/useModels.test.ts                   | 28 +++++-------
 .../ModelDownloadRow/index.tsx                |  3 +-
 14 files changed, 83 insertions(+), 111 deletions(-)

diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
index 4a36f6b12..8a7722f3a 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@@ -44,48 +44,14 @@ describe('LocalOAIEngine', () => {
 
   it('should load model correctly', async () => {
     const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any
-    const modelFolder = 'path/to'
-    const systemInfo = { os: 'testOS' }
-    const res = { error: null }
 
-    ;(dirName as jest.Mock).mockResolvedValue(modelFolder)
-    ;(systemInformation as jest.Mock).mockResolvedValue(systemInfo)
-    ;(executeOnMain as jest.Mock).mockResolvedValue(res)
-
-    await engine.loadModel(model)
-
-    expect(systemInformation).toHaveBeenCalled()
-    expect(executeOnMain).toHaveBeenCalledWith(
-      engine.nodeModule,
-      engine.loadModelFunctionName,
-      { modelFolder, model },
-      systemInfo
-    )
-    expect(events.emit).toHaveBeenCalledWith(ModelEvent.OnModelReady, model)
-  })
-
-  it('should handle load model error', async () => {
-    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
-    const modelFolder = 'path/to'
-    const systemInfo = { os: 'testOS' }
-    const res = { error: 'load error' }
-
-    ;(dirName as jest.Mock).mockResolvedValue(modelFolder)
-    ;(systemInformation as jest.Mock).mockResolvedValue(systemInfo)
-    ;(executeOnMain as jest.Mock).mockResolvedValue(res)
-
-    await expect(engine.loadModel(model)).rejects.toEqual('load error')
-
-    expect(events.emit).toHaveBeenCalledWith(ModelEvent.OnModelFail, { error: res.error })
+    expect(engine.loadModel(model)).toBeTruthy()
   })
 
   it('should unload model correctly', async () => {
     const model: Model = { engine: 'testProvider' } as any
 
-    await engine.unloadModel(model)
-
-    expect(executeOnMain).toHaveBeenCalledWith(engine.nodeModule, engine.unloadModelFunctionName)
-    expect(events.emit).toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
+    expect(engine.unloadModel(model)).toBeTruthy()
   })
 
   it('should not unload model if engine does not match', async () => {
diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts
index cb5b6760e..e8bd8cdf2 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@@ -36,11 +36,6 @@ export abstract class LocalOAIEngine extends OAIEngine {
    * Stops the model.
    */
   override async unloadModel(model?: Model) {
-    if (model?.engine && model.engine?.toString() !== this.provider) return Promise.resolve()
-
-    this.loadedModel = undefined
-    await executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
-      events.emit(ModelEvent.OnModelStopped, {})
-    })
+    return Promise.resolve()
   }
 }
diff --git a/core/src/node/api/processors/download.test.ts b/core/src/node/api/processors/download.test.ts
index 370f1746f..21d94165d 100644
--- a/core/src/node/api/processors/download.test.ts
+++ b/core/src/node/api/processors/download.test.ts
@@ -8,7 +8,8 @@ jest.mock('../../helper', () => ({
 
 jest.mock('../../helper/path', () => ({
   validatePath: jest.fn().mockReturnValue('path/to/folder'),
-  normalizeFilePath: () => process.platform === 'win32' ? 'C:\\Users\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
+  normalizeFilePath: () =>
+    process.platform === 'win32' ? 'C:\\Users\\path\\to\\file.gguf' : '/Users/path/to/file.gguf',
 }))
 
 jest.mock(
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index ed1db94bd..25ed95b8d 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -31,7 +31,7 @@ export enum InferenceEngine {
   cortex = 'cortex',
   cortex_llamacpp = 'llama-cpp',
   cortex_onnx = 'onnxruntime',
-  cortex_tensorrtllm = '.tensorrt-llm',
+  cortex_tensorrtllm = 'tensorrt-llm',
 }
 
 export type ModelArtifact = {
diff --git a/extensions/model-extension/rollup.config.ts b/extensions/model-extension/rollup.config.ts
index 781c4df84..64e62480f 100644
--- a/extensions/model-extension/rollup.config.ts
+++ b/extensions/model-extension/rollup.config.ts
@@ -20,8 +20,8 @@ export default [
       replace({
         preventAssignment: true,
         SETTINGS: JSON.stringify(settingJson),
-        API_URL: 'http://127.0.0.1:39291',
-        SOCKET_URL: 'ws://127.0.0.1:39291',
+        API_URL: JSON.stringify('http://127.0.0.1:39291'),
+        SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
       }),
       // Allow json resolution
       json(),
diff --git a/web/helpers/atoms/Model.atom.test.ts b/web/helpers/atoms/Model.atom.test.ts
index 57827efec..923f24df4 100644
--- a/web/helpers/atoms/Model.atom.test.ts
+++ b/web/helpers/atoms/Model.atom.test.ts
@@ -32,13 +32,22 @@ describe('Model.atom.ts', () => {
   })
 
   describe('showEngineListModelAtom', () => {
-    it('should initialize as an empty array', () => {
-      expect(ModelAtoms.showEngineListModelAtom.init).toEqual(['nitro'])
+    it('should initialize with local engines', () => {
+      expect(ModelAtoms.showEngineListModelAtom.init).toEqual([
+        'nitro',
+        'cortex',
+        'llama-cpp',
+        'onnxruntime',
+        'tensorrt-llm',
+      ])
     })
   })
 
   describe('addDownloadingModelAtom', () => {
     it('should add downloading model', async () => {
+      const { result: reset } = renderHook(() =>
+        useSetAtom(ModelAtoms.downloadingModelsAtom)
+      )
       const { result: setAtom } = renderHook(() =>
         useSetAtom(ModelAtoms.addDownloadingModelAtom)
       )
@@ -49,11 +58,16 @@ describe('Model.atom.ts', () => {
         setAtom.current({ id: '1' } as any)
       })
       expect(getAtom.current).toEqual([{ id: '1' }])
+      reset.current([])
     })
   })
 
   describe('removeDownloadingModelAtom', () => {
     it('should remove downloading model', async () => {
+      const { result: reset } = renderHook(() =>
+        useSetAtom(ModelAtoms.downloadingModelsAtom)
+      )
+
       const { result: setAtom } = renderHook(() =>
         useSetAtom(ModelAtoms.addDownloadingModelAtom)
       )
@@ -63,16 +77,21 @@ describe('Model.atom.ts', () => {
       const { result: getAtom } = renderHook(() =>
         useAtomValue(ModelAtoms.getDownloadingModelAtom)
       )
+      expect(getAtom.current).toEqual([])
       act(() => {
-        setAtom.current({ id: '1' } as any)
+        setAtom.current('1')
         removeAtom.current('1')
       })
       expect(getAtom.current).toEqual([])
+      reset.current([])
     })
   })
 
   describe('removeDownloadedModelAtom', () => {
     it('should remove downloaded model', async () => {
+      const { result: reset } = renderHook(() =>
+        useSetAtom(ModelAtoms.downloadingModelsAtom)
+      )
       const { result: setAtom } = renderHook(() =>
         useSetAtom(ModelAtoms.downloadedModelsAtom)
       )
@@ -94,6 +113,7 @@ describe('Model.atom.ts', () => {
         removeAtom.current('1')
       })
       expect(getAtom.current).toEqual([])
+      reset.current([])
     })
   })
 
@@ -284,10 +304,4 @@ describe('Model.atom.ts', () => {
       expect(importAtom.current[0]).toEqual([])
     })
   })
-
-  describe('defaultModelAtom', () => {
-    it('should initialize as undefined', () => {
-      expect(ModelAtoms.defaultModelAtom.init).toBeUndefined()
-    })
-  })
 })
diff --git a/web/helpers/atoms/Model.atom.ts b/web/helpers/atoms/Model.atom.ts
index 0f5367f64..dd4414801 100644
--- a/web/helpers/atoms/Model.atom.ts
+++ b/web/helpers/atoms/Model.atom.ts
@@ -64,13 +64,13 @@ export const stateModel = atom({ state: 'start', loading: false, model: '' })
 /**
  * Stores the list of models which are being downloaded.
  */
-const downloadingModelsAtom = atom<string[]>([])
+export const downloadingModelsAtom = atom<string[]>([])
 
 export const getDownloadingModelAtom = atom((get) => get(downloadingModelsAtom))
 
 export const addDownloadingModelAtom = atom(null, (get, set, model: string) => {
   const downloadingModels = get(downloadingModelsAtom)
-  if (!downloadingModels.find((e) => e === model)) {
+  if (!downloadingModels.includes(model)) {
     set(downloadingModelsAtom, [...downloadingModels, model])
   }
 })
diff --git a/web/hooks/useDeleteModel.test.ts b/web/hooks/useDeleteModel.test.ts
index 3a6587d7b..3ee0926f9 100644
--- a/web/hooks/useDeleteModel.test.ts
+++ b/web/hooks/useDeleteModel.test.ts
@@ -35,7 +35,7 @@ describe('useDeleteModel', () => {
       await result.current.deleteModel(mockModel)
     })
 
-    expect(mockDeleteModel).toHaveBeenCalledWith(mockModel)
+    expect(mockDeleteModel).toHaveBeenCalledWith('test-model')
     expect(toaster).toHaveBeenCalledWith({
       title: 'Model Deletion Successful',
       description: `Model ${mockModel.name} has been successfully deleted.`,
@@ -67,7 +67,7 @@ describe('useDeleteModel', () => {
       )
     })
 
-    expect(mockDeleteModel).toHaveBeenCalledWith(mockModel)
+    expect(mockDeleteModel).toHaveBeenCalledWith("test-model")
     expect(toaster).not.toHaveBeenCalled()
   })
 })
diff --git a/web/hooks/useDownloadModel.test.ts b/web/hooks/useDownloadModel.test.ts
index fc0b7c21f..ff75fbcd8 100644
--- a/web/hooks/useDownloadModel.test.ts
+++ b/web/hooks/useDownloadModel.test.ts
@@ -13,12 +13,6 @@ jest.mock('jotai', () => ({
 }))
 jest.mock('@janhq/core')
 jest.mock('@/extension/ExtensionManager')
-jest.mock('./useGpuSetting', () => ({
-  __esModule: true,
-  default: () => ({
-    getGpuSettings: jest.fn().mockResolvedValue({ some: 'gpuSettings' }),
-  }),
-}))
 
 describe('useDownloadModel', () => {
   beforeEach(() => {
@@ -29,25 +23,24 @@ describe('useDownloadModel', () => {
   it('should download a model', async () => {
     const mockModel: core.Model = {
       id: 'test-model',
-      sources: [{ filename: 'test.bin' }],
+      sources: [{ filename: 'test.bin', url: 'https://fake.url' }],
     } as core.Model
 
     const mockExtension = {
-      downloadModel: jest.fn().mockResolvedValue(undefined),
+      pullModel: jest.fn().mockResolvedValue(undefined),
     }
     ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined)
     ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension)
 
     const { result } = renderHook(() => useDownloadModel())
 
-    await act(async () => {
-      await result.current.downloadModel(mockModel)
+    act(() => {
+      result.current.downloadModel(mockModel.sources[0].url, mockModel.id)
     })
 
-    expect(mockExtension.downloadModel).toHaveBeenCalledWith(
-      mockModel,
-      { some: 'gpuSettings' },
-      { ignoreSSL: undefined, proxy: '' }
+    expect(mockExtension.pullModel).toHaveBeenCalledWith(
+      mockModel.sources[0].url,
+      mockModel.id
     )
   })
 
@@ -58,15 +51,18 @@ describe('useDownloadModel', () => {
     } as core.Model
 
     ;(core.joinPath as jest.Mock).mockResolvedValue('/path/to/model/test.bin')
-    ;(core.abortDownload as jest.Mock).mockResolvedValue(undefined)
+    const mockExtension = {
+      cancelModelPull: jest.fn().mockResolvedValue(undefined),
+    }
     ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined)
+    ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension)
     const { result } = renderHook(() => useDownloadModel())
 
-    await act(async () => {
-      await result.current.abortModelDownload(mockModel)
+    act(() => {
+      result.current.abortModelDownload(mockModel.id)
     })
 
-    expect(core.abortDownload).toHaveBeenCalledWith('/path/to/model/test.bin')
+    expect(mockExtension.cancelModelPull).toHaveBeenCalledWith('test-model')
   })
 
   it('should handle proxy settings', async () => {
@@ -76,7 +72,7 @@ describe('useDownloadModel', () => {
     } as core.Model
 
     const mockExtension = {
-      downloadModel: jest.fn().mockResolvedValue(undefined),
+      pullModel: jest.fn().mockResolvedValue(undefined),
     }
     ;(useSetAtom as jest.Mock).mockReturnValue(() => undefined)
     ;(extensionManager.get as jest.Mock).mockReturnValue(mockExtension)
@@ -85,14 +81,13 @@ describe('useDownloadModel', () => {
 
     const { result } = renderHook(() => useDownloadModel())
 
-    await act(async () => {
-      await result.current.downloadModel(mockModel)
+    act(() => {
+      result.current.downloadModel(mockModel.sources[0].url, mockModel.id)
     })
 
-    expect(mockExtension.downloadModel).toHaveBeenCalledWith(
-      mockModel,
-      expect.objectContaining({ some: 'gpuSettings' }),
-      expect.anything()
+    expect(mockExtension.pullModel).toHaveBeenCalledWith(
+      mockModel.sources[0].url,
+      mockModel.id
     )
   })
 })
diff --git a/web/hooks/useGetHFRepoData.test.ts b/web/hooks/useGetHFRepoData.test.ts
index eaf86d79a..01055612d 100644
--- a/web/hooks/useGetHFRepoData.test.ts
+++ b/web/hooks/useGetHFRepoData.test.ts
@@ -1,6 +1,10 @@
+/**
+ * @jest-environment jsdom
+ */
 import { renderHook, act } from '@testing-library/react'
 import { useGetHFRepoData } from './useGetHFRepoData'
 import { extensionManager } from '@/extension'
+import * as hf from '@/utils/huggingface'
 
 jest.mock('@/extension', () => ({
   extensionManager: {
@@ -8,6 +12,8 @@ jest.mock('@/extension', () => ({
   },
 }))
 
+jest.mock('@/utils/huggingface')
+
 describe('useGetHFRepoData', () => {
   beforeEach(() => {
     jest.clearAllMocks()
@@ -15,10 +21,7 @@ describe('useGetHFRepoData', () => {
 
   it('should fetch HF repo data successfully', async () => {
     const mockData = { name: 'Test Repo', stars: 100 }
-    const mockFetchHuggingFaceRepoData = jest.fn().mockResolvedValue(mockData)
-    ;(extensionManager.get as jest.Mock).mockReturnValue({
-      fetchHuggingFaceRepoData: mockFetchHuggingFaceRepoData,
-    })
+    ;(hf.fetchHuggingFaceRepoData as jest.Mock).mockReturnValue(mockData)
 
     const { result } = renderHook(() => useGetHFRepoData())
 
@@ -34,6 +37,5 @@ describe('useGetHFRepoData', () => {
 
     expect(result.current.error).toBeUndefined()
     expect(await data).toEqual(mockData)
-    expect(mockFetchHuggingFaceRepoData).toHaveBeenCalledWith('test-repo')
   })
 })
diff --git a/web/hooks/useImportModel.test.ts b/web/hooks/useImportModel.test.ts
index 2148f581b..d37e4a853 100644
--- a/web/hooks/useImportModel.test.ts
+++ b/web/hooks/useImportModel.test.ts
@@ -18,7 +18,7 @@ describe('useImportModel', () => {
   it('should import models successfully', async () => {
     const mockImportModels = jest.fn().mockResolvedValue(undefined)
     const mockExtension = {
-      importModels: mockImportModels,
+      importModel: mockImportModels,
     } as any
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockExtension)
@@ -26,15 +26,16 @@ describe('useImportModel', () => {
     const { result } = renderHook(() => useImportModel())
 
     const models = [
-      { importId: '1', name: 'Model 1', path: '/path/to/model1' },
-      { importId: '2', name: 'Model 2', path: '/path/to/model2' },
+      { modelId: '1', path: '/path/to/model1' },
+      { modelId: '2', path: '/path/to/model2' },
     ] as any
 
     await act(async () => {
       await result.current.importModels(models, 'local' as any)
     })
 
-    expect(mockImportModels).toHaveBeenCalledWith(models, 'local')
+    expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1')
+    expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2')
   })
 
   it('should update model info successfully', async () => {
@@ -42,7 +43,7 @@ describe('useImportModel', () => {
       .fn()
       .mockResolvedValue({ id: 'model-1', name: 'Updated Model' })
     const mockExtension = {
-      updateModelInfo: mockUpdateModelInfo,
+      updateModel: mockUpdateModelInfo,
     } as any
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockExtension)
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index df6b085ca..5650c73bd 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -103,6 +103,7 @@ const useImportModel = () => {
 
 const localImportModels = async (
   models: ImportingModel[],
+  // TODO: @louis - We will set this option when cortex.cpp supports it
   optionType: OptionType
 ): Promise<void> => {
   await models
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 4c53ffaa7..33c152672 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -1,7 +1,7 @@
 // useModels.test.ts
 
 import { renderHook, act } from '@testing-library/react'
-import { events, ModelEvent } from '@janhq/core'
+import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'
 
 // Mock dependencies
@@ -11,18 +11,11 @@ jest.mock('@/extension')
 import useModels from './useModels'
 
 // Mock data
-const mockDownloadedModels = [
+const models = [
   { id: 'model-1', name: 'Model 1' },
   { id: 'model-2', name: 'Model 2' },
 ]
 
-const mockConfiguredModels = [
-  { id: 'model-3', name: 'Model 3' },
-  { id: 'model-4', name: 'Model 4' },
-]
-
-const mockDefaultModel = { id: 'default-model', name: 'Default Model' }
-
 describe('useModels', () => {
   beforeEach(() => {
     jest.clearAllMocks()
@@ -30,20 +23,23 @@ describe('useModels', () => {
 
   it('should fetch and set models on mount', async () => {
     const mockModelExtension = {
-      getDownloadedModels: jest.fn().mockResolvedValue(mockDownloadedModels),
-      getConfiguredModels: jest.fn().mockResolvedValue(mockConfiguredModels),
-      getDefaultModel: jest.fn().mockResolvedValue(mockDefaultModel),
+      getModels: jest.fn().mockResolvedValue(models),
     } as any
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => {},
+        }),
+      },
+    })
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
 
-    await act(async () => {
+    act(() => {
       renderHook(() => useModels())
     })
 
-    expect(mockModelExtension.getDownloadedModels).toHaveBeenCalled()
-    expect(mockModelExtension.getConfiguredModels).toHaveBeenCalled()
-    expect(mockModelExtension.getDefaultModel).toHaveBeenCalled()
+    expect(mockModelExtension.getModels).toHaveBeenCalled()
   })
 
   it('should remove event listener on unmount', async () => {
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index 03413006f..ccb966829 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -15,12 +15,13 @@ import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage, toGibibytes } from '@/utils/converter'
 
+import { normalizeModelId } from '@/utils/model'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 
 import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
-import { normalizeModelId } from '@/utils/model'
 
 type Props = {
   index: number

From b5edc12b289421396e86b5c0e3c13872929b024d Mon Sep 17 00:00:00 2001
From: "Louis (aider)" <louis@jan.ai>
Date: Mon, 21 Oct 2024 16:33:40 +0700
Subject: [PATCH 11/71] feat: add tests for huggingface utility functions

---
 web/utils/huggingface.test.ts | 96 +++++++++++++++++++++++++++++++++++
 web/utils/huggingface.ts      |  4 +-
 2 files changed, 98 insertions(+), 2 deletions(-)
 create mode 100644 web/utils/huggingface.test.ts

diff --git a/web/utils/huggingface.test.ts b/web/utils/huggingface.test.ts
new file mode 100644
index 000000000..db7dbf3e1
--- /dev/null
+++ b/web/utils/huggingface.test.ts
@@ -0,0 +1,96 @@
+import {
+  fetchHuggingFaceRepoData,
+  toHuggingFaceUrl,
+  InvalidHostError,
+} from './huggingface'
+import { getFileSize } from '@janhq/core'
+
+// Mock the getFileSize function
+jest.mock('@janhq/core', () => ({
+  getFileSize: jest.fn(),
+  AllQuantizations: ['q4_0', 'q4_1', 'q5_0', 'q5_1', 'q8_0'],
+}))
+
+describe('huggingface utils', () => {
+  let originalFetch: typeof global.fetch
+
+  beforeAll(() => {
+    originalFetch = global.fetch
+    global.fetch = jest.fn()
+  })
+
+  afterAll(() => {
+    global.fetch = originalFetch
+  })
+
+  beforeEach(() => {
+    jest.resetAllMocks()
+  })
+
+  describe('fetchHuggingFaceRepoData', () => {
+    it('should fetch and process repo data correctly', async () => {
+      const mockResponse = {
+        tags: ['gguf'],
+        siblings: [
+          { rfilename: 'model-q4_0.gguf' },
+          { rfilename: 'model-q8_0.gguf' },
+        ],
+      }
+
+      ;(global.fetch as jest.Mock).mockResolvedValue({
+        json: jest.fn().mockResolvedValue(mockResponse),
+      })
+      ;(getFileSize as jest.Mock).mockResolvedValue(1000000)
+
+      const result = await fetchHuggingFaceRepoData('user/repo')
+
+      expect(result.tags).toEqual(['gguf'])
+      expect(result.siblings).toHaveLength(2)
+      expect(result.siblings[0].fileSize).toBe(1000000)
+      expect(result.siblings[0].quantization).toBe('q4_0')
+      expect(result.modelUrl).toBe('https://huggingface.co/user/repo')
+    })
+
+    it('should throw an error if the model is not GGUF', async () => {
+      const mockResponse = {
+        tags: ['not-gguf'],
+      }
+
+      ;(global.fetch as jest.Mock).mockResolvedValue({
+        json: jest.fn().mockResolvedValue(mockResponse),
+      })
+
+      await expect(fetchHuggingFaceRepoData('user/repo')).rejects.toThrow(
+        'user/repo is not supported. Only GGUF models are supported.'
+      )
+    })
+
+    // ... existing code ...
+  })
+
+  describe('toHuggingFaceUrl', () => {
+    it('should convert a valid repo ID to a Hugging Face API URL', () => {
+      expect(toHuggingFaceUrl('user/repo')).toBe(
+        'https://huggingface.co/api/models/user/repo'
+      )
+    })
+
+    it('should handle a full Hugging Face URL', () => {
+      expect(toHuggingFaceUrl('https://huggingface.co/user/repo')).toBe(
+        'https://huggingface.co/api/models/user/repo'
+      )
+    })
+
+    it('should throw an InvalidHostError for non-Hugging Face URLs', () => {
+      expect(() => toHuggingFaceUrl('https://example.com/user/repo')).toThrow(
+        InvalidHostError
+      )
+    })
+
+    it('should throw an error for invalid URLs', () => {
+      expect(() => toHuggingFaceUrl('https://invalid-url')).toThrow(
+        'Invalid Hugging Face repo URL: https://invalid-url'
+      )
+    })
+  })
+})
diff --git a/web/utils/huggingface.ts b/web/utils/huggingface.ts
index 328d684e6..ceddc6867 100644
--- a/web/utils/huggingface.ts
+++ b/web/utils/huggingface.ts
@@ -60,7 +60,7 @@ export const fetchHuggingFaceRepoData = async (
   return data
 }
 
-function toHuggingFaceUrl(repoId: string): string {
+export function toHuggingFaceUrl(repoId: string): string {
   try {
     const url = new URL(repoId)
     if (url.host !== 'huggingface.co') {
@@ -85,7 +85,7 @@ function toHuggingFaceUrl(repoId: string): string {
     return `https://huggingface.co/api/models/${repoId}`
   }
 }
-class InvalidHostError extends Error {
+export class InvalidHostError extends Error {
   constructor(message: string) {
     super(message)
     this.name = 'InvalidHostError'

From 8129c2319fadf80ddf34a3c2ea6ef65e619a207b Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 17:11:25 +0700
Subject: [PATCH 12/71] fix: downloaded models should account for remote models

---
 web/hooks/useModels.ts | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 1cbd970d6..b09839457 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -11,6 +11,8 @@ import {
 
 import { useSetAtom } from 'jotai'
 
+import { isLocalEngine } from '@/utils/modelEngine'
+
 import { extensionManager } from '@/extension'
 import {
   configuredModelsAtom,
@@ -28,8 +30,12 @@ const useModels = () => {
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
-      const models = await getModels()
-      setDownloadedModels(models)
+      const localModels = await getModels()
+      const remoteModels = ModelManager.instance()
+        .models.values()
+        .toArray()
+        .filter((e) => !isLocalEngine(e.engine))
+      setDownloadedModels([...localModels, ...remoteModels])
     }
 
     const getExtensionModels = async () => {

From 895c3d424642ad9adfdd62f644aeffee49d0056c Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 17:22:15 +0700
Subject: [PATCH 13/71] fix: tests - useModels with remote models filter

---
 extensions/model-extension/src/index.test.ts | 3 +--
 web/hooks/useModels.test.ts                  | 5 +++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/extensions/model-extension/src/index.test.ts b/extensions/model-extension/src/index.test.ts
index 05598c30d..e514f8ce3 100644
--- a/extensions/model-extension/src/index.test.ts
+++ b/extensions/model-extension/src/index.test.ts
@@ -1,5 +1,4 @@
 import JanModelExtension from './index'
-import { Model } from '@janhq/core'
 
 let SETTINGS = []
 // @ts-ignore
@@ -72,7 +71,7 @@ describe('JanModelExtension', () => {
   })
 
   it('should update a model', async () => {
-    const model: Partial<Model> = { id: 'test-model' }
+    const model = { id: 'test-model' }
     const updatedModel = await extension.updateModel(model)
     expect(updatedModel).toEqual({})
     expect(mockCortexAPI.updateModel).toHaveBeenCalledWith(model)
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 33c152672..b58102144 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -1,5 +1,4 @@
 // useModels.test.ts
-
 import { renderHook, act } from '@testing-library/react'
 import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'
@@ -28,7 +27,9 @@ describe('useModels', () => {
     ;(ModelManager.instance as jest.Mock).mockReturnValue({
       models: {
         values: () => ({
-          toArray: () => {},
+          toArray: () => ({
+            filter: () => models,
+          }),
         }),
       },
     })

From 5edf121d96909d6de32ff096d08811ad533fe80c Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 18:31:16 +0700
Subject: [PATCH 14/71] test: add tests to legacy model-json utilities

---
 .../model-extension/src/model-json.test.ts    | 80 +++++++++++++++++++
 extensions/model-extension/src/model-json.ts  |  2 +-
 2 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 extensions/model-extension/src/model-json.test.ts

diff --git a/extensions/model-extension/src/model-json.test.ts b/extensions/model-extension/src/model-json.test.ts
new file mode 100644
index 000000000..a4ea5bc0b
--- /dev/null
+++ b/extensions/model-extension/src/model-json.test.ts
@@ -0,0 +1,80 @@
+import { scanModelsFolder, getModelJsonPath } from './model-json'
+
+// Mock the @janhq/core module
+jest.mock('@janhq/core', () => ({
+  fs: {
+    existsSync: jest.fn(),
+    readdirSync: jest.fn(),
+    fileStat: jest.fn(),
+    readFileSync: jest.fn(),
+  },
+  joinPath: jest.fn((paths) => paths.join('/')),
+}))
+
+// Import the mocked fs and joinPath after the mock is set up
+const { fs } = jest.requireMock('@janhq/core')
+
+describe('model-json', () => {
+  beforeEach(() => {
+    jest.clearAllMocks()
+  })
+
+  describe('scanModelsFolder', () => {
+    it('should return an empty array when models folder does not exist', async () => {
+      fs.existsSync.mockReturnValue(false)
+
+      const result = await scanModelsFolder()
+      expect(result).toEqual([])
+    })
+
+    it('should return an array of models when valid model folders exist', async () => {
+      const mockModelJson = {
+        id: 'test-model',
+        sources: [
+          {
+            filename: 'test-model',
+            url: 'file://models/test-model/test-model.gguf',
+          },
+        ],
+      }
+
+      fs.existsSync.mockReturnValue(true)
+      fs.readdirSync.mockReturnValueOnce(['test-model'])
+      fs.fileStat.mockResolvedValue({ isDirectory: () => true })
+      fs.readFileSync.mockReturnValue(JSON.stringify(mockModelJson))
+      fs.readdirSync.mockReturnValueOnce(['test-model.gguf', 'model.json'])
+
+      const result = await scanModelsFolder()
+      expect(result).toHaveLength(1)
+      expect(result[0]).toMatchObject(mockModelJson)
+    })
+  })
+
+  describe('getModelJsonPath', () => {
+    it('should return undefined when folder does not exist', async () => {
+      fs.existsSync.mockReturnValue(false)
+
+      const result = await getModelJsonPath('non-existent-folder')
+      expect(result).toBeUndefined()
+    })
+
+    it('should return the path when model.json exists in the root folder', async () => {
+      fs.existsSync.mockReturnValue(true)
+      fs.readdirSync.mockReturnValue(['model.json'])
+
+      const result = await getModelJsonPath('test-folder')
+      expect(result).toBe('test-folder/model.json')
+    })
+
+    it('should return the path when model.json exists in a subfolder', async () => {
+      fs.existsSync.mockReturnValue(true)
+      fs.readdirSync
+        .mockReturnValueOnce(['subfolder'])
+        .mockReturnValueOnce(['model.json'])
+      fs.fileStat.mockResolvedValue({ isDirectory: () => true })
+
+      const result = await getModelJsonPath('test-folder')
+      expect(result).toBe('test-folder/subfolder/model.json')
+    })
+  })
+})
diff --git a/extensions/model-extension/src/model-json.ts b/extensions/model-extension/src/model-json.ts
index af6f95b36..46eee3482 100644
--- a/extensions/model-extension/src/model-json.ts
+++ b/extensions/model-extension/src/model-json.ts
@@ -57,7 +57,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
               !source.url.startsWith(`https://`)
           )
         )
-        if (existFiles.every((exist) => exist)) return true
+        if (existFiles.every((exist) => exist)) return model
 
         const result = await fs
           .readdirSync(await joinPath([_homeDir, dirName]))

From 716fd96d5679c4a716e2e04545d6937c7eb4330a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 18:54:26 +0700
Subject: [PATCH 15/71] test: add tests for migration strategy

---
 extensions/model-extension/src/index.ts       |  23 ++-
 .../model-extension/src/migration.test.ts     | 167 ++++++++++++++++++
 2 files changed, 178 insertions(+), 12 deletions(-)
 create mode 100644 extensions/model-extension/src/migration.test.ts

diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 38fd0634a..ea026a59d 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -86,25 +86,24 @@ export default class JanModelExtension extends ModelExtension {
      * it would reset app cache and app will not function properly
      * should compare and try import
      */
+    let currentModels: Model[] = []
 
     if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
       // Updated from an older version than 0.5.5
       // Scan through the models folder and import them (Legacy flow)
       // Return models immediately
-      return scanModelsFolder().then((models) => {
+      currentModels = await scanModelsFolder().then((models) => {
         return models ?? []
       })
-    }
-
-    let currentModels: Model[] = []
-
-    try {
-      currentModels = JSON.parse(
-        localStorage.getItem(ExtensionEnum.downloadedModels)
-      ) as Model[]
-    } catch (e) {
-      currentModels = []
-      console.error(e)
+    } else {
+      try {
+        currentModels = JSON.parse(
+          localStorage.getItem(ExtensionEnum.downloadedModels)
+        ) as Model[]
+      } catch (e) {
+        currentModels = []
+        console.error(e)
+      }
     }
 
     /**
diff --git a/extensions/model-extension/src/migration.test.ts b/extensions/model-extension/src/migration.test.ts
new file mode 100644
index 000000000..a3ddfa87c
--- /dev/null
+++ b/extensions/model-extension/src/migration.test.ts
@@ -0,0 +1,167 @@
+import { Model, InferenceEngine } from '@janhq/core'
+import JanModelExtension from './index'
+
+// Mock the @janhq/core module
+jest.mock('@janhq/core', () => ({
+  ModelExtension: class {},
+  InferenceEngine: {
+    nitro: 'nitro',
+  },
+  joinPath: jest.fn(),
+  dirName: jest.fn(),
+}))
+
+// Mock the CortexAPI
+jest.mock('./cortex', () => ({
+  CortexAPI: jest.fn().mockImplementation(() => ({
+    getModels: jest.fn(),
+    importModel: jest.fn(),
+  })),
+}))
+
+// Mock the model-json module
+jest.mock('./model-json', () => ({
+  scanModelsFolder: jest.fn(),
+}))
+
+// Import the mocked scanModelsFolder after the mock is set up
+const { scanModelsFolder } = jest.requireMock('./model-json')
+
+describe('JanModelExtension', () => {
+  let extension: JanModelExtension
+  let mockLocalStorage: { [key: string]: string }
+  let mockCortexAPI: jest.Mock
+
+  beforeEach(() => {
+    // @ts-ignore
+    extension = new JanModelExtension()
+    mockLocalStorage = {}
+    mockCortexAPI = extension.cortexAPI as any
+
+    // Mock localStorage
+    Object.defineProperty(global, 'localStorage', {
+      value: {
+        getItem: jest.fn((key) => mockLocalStorage[key]),
+        setItem: jest.fn((key, value) => {
+          mockLocalStorage[key] = value
+        }),
+      },
+      writable: true,
+    })
+  })
+
+  describe('getModels', () => {
+    it('should scan models folder when localStorage is empty', async () => {
+      const mockModels: Model[] = [
+        {
+          id: 'model1',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
+          ],
+          file_path: '/path/to/model1',
+        },
+        {
+          id: 'model2',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
+          ],
+          file_path: '/path/to/model2',
+        },
+      ] as any
+      scanModelsFolder.mockResolvedValue(mockModels)
+      extension.cortexAPI.importModel = jest
+        .fn()
+        .mockResolvedValueOnce(mockModels[0])
+      extension.cortexAPI.getModels = jest
+        .fn()
+        .mockResolvedValue([mockModels[0]])
+      extension.cortexAPI.importModel = jest
+        .fn()
+        .mockResolvedValueOnce(mockModels[1])
+      extension.cortexAPI.getModels = jest
+        .fn()
+        .mockResolvedValue([mockModels[0], mockModels[1]])
+
+      const result = await extension.getModels()
+      expect(scanModelsFolder).toHaveBeenCalled()
+      expect(result).toEqual(mockModels)
+    })
+
+    it('should import models when there are models to import', async () => {
+      const mockModels: Model[] = [
+        {
+          id: 'model1',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          file_path: '/path/to/model1',
+          sources: [
+            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
+          ],
+        },
+        {
+          id: 'model2',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          file_path: '/path/to/model2',
+          sources: [
+            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
+          ],
+        },
+      ] as any
+      mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels)
+
+      extension.cortexAPI.getModels = jest.fn().mockResolvedValue([])
+      extension.importModel = jest.fn().mockResolvedValue(undefined)
+
+      const result = await extension.getModels()
+
+      expect(extension.importModel).toHaveBeenCalledTimes(2)
+      expect(result).toEqual(mockModels)
+    })
+
+    it('should return models from cortexAPI when all models are already imported', async () => {
+      const mockModels: Model[] = [
+        {
+          id: 'model1',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model1.gguf', url: 'file://models/model1.gguf' },
+          ],
+        },
+        {
+          id: 'model2',
+          object: 'model',
+          version: '1',
+          format: 'gguf',
+          engine: InferenceEngine.nitro,
+          sources: [
+            { filename: 'model2.gguf', url: 'file://models/model2.gguf' },
+          ],
+        },
+      ] as any
+      mockLocalStorage['downloadedModels'] = JSON.stringify(mockModels)
+
+      extension.cortexAPI.getModels = jest.fn().mockResolvedValue(mockModels)
+
+      const result = await extension.getModels()
+
+      expect(extension.cortexAPI.getModels).toHaveBeenCalled()
+      expect(result).toEqual(mockModels)
+    })
+  })
+})

From 718ee8dfa93e7511f662a9c2801fef619270cb14 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 21:16:01 +0700
Subject: [PATCH 16/71] test: reset data on test - add debug

---
 electron/tests/config/fixtures.ts  | 8 +++++++-
 electron/tests/e2e/hub.e2e.spec.ts | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/electron/tests/config/fixtures.ts b/electron/tests/config/fixtures.ts
index 22d83b636..bc3f8a7d1 100644
--- a/electron/tests/config/fixtures.ts
+++ b/electron/tests/config/fixtures.ts
@@ -15,6 +15,8 @@ import {
 import { Constants } from './constants'
 import { HubPage } from '../pages/hubPage'
 import { CommonActions } from '../pages/commonActions'
+import { rmSync } from 'fs'
+import * as path from 'path'
 
 export let electronApp: ElectronApplication
 export let page: Page
@@ -103,10 +105,14 @@ export const test = base.extend<
     },
     { auto: true },
   ],
-  
 })
 
 test.beforeAll(async () => {
+  await rmSync(path.join(__dirname, '../../test-data'), {
+    recursive: true,
+    force: true,
+  })
+
   test.setTimeout(TIMEOUT)
   await setupElectron()
   await page.waitForSelector('img[alt="Jan - Logo"]', {
diff --git a/electron/tests/e2e/hub.e2e.spec.ts b/electron/tests/e2e/hub.e2e.spec.ts
index 23d4d0b6d..9b1b8305f 100644
--- a/electron/tests/e2e/hub.e2e.spec.ts
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@@ -16,7 +16,7 @@ test.beforeAll(async () => {
 test('explores hub', async ({ hubPage }) => {
   await hubPage.navigateByMenu()
   await hubPage.verifyContainerVisible()
-  const useModelBtn= page.getByTestId(/^use-model-btn-.*/).first()
+  const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()
 
   await expect(useModelBtn).toBeVisible({
     timeout: TIMEOUT,

From 523c74515042b6f57e1d3c9ac776a94fdb40c258 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 21 Oct 2024 21:42:55 +0700
Subject: [PATCH 17/71] chore: try catch legacy assistant creation

---
 extensions/assistant-extension/src/index.ts | 115 +++++++++++---------
 extensions/model-extension/src/index.ts     |  25 ++---
 2 files changed, 73 insertions(+), 67 deletions(-)

diff --git a/extensions/assistant-extension/src/index.ts b/extensions/assistant-extension/src/index.ts
index 12441995e..6705483d6 100644
--- a/extensions/assistant-extension/src/index.ts
+++ b/extensions/assistant-extension/src/index.ts
@@ -63,39 +63,46 @@ export default class JanAssistantExtension extends AssistantExtension {
   }
 
   async getAssistants(): Promise<Assistant[]> {
-    // get all the assistant directories
-    // get all the assistant metadata json
-    const results: Assistant[] = []
-    const allFileName: string[] = await fs.readdirSync(
-      JanAssistantExtension._homeDir
-    )
-    for (const fileName of allFileName) {
-      const filePath = await joinPath([
-        JanAssistantExtension._homeDir,
-        fileName,
-      ])
+    try {
+      // get all the assistant directories
+      // get all the assistant metadata json
+      const results: Assistant[] = []
 
-      if (!(await fs.fileStat(filePath))?.isDirectory) continue
-      const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
-        (file: string) => file === 'assistant.json'
+      const allFileName: string[] = await fs.readdirSync(
+        JanAssistantExtension._homeDir
       )
 
-      if (jsonFiles.length !== 1) {
-        // has more than one assistant file -> ignore
-        continue
+      for (const fileName of allFileName) {
+        const filePath = await joinPath([
+          JanAssistantExtension._homeDir,
+          fileName,
+        ])
+
+        if (!(await fs.fileStat(filePath))?.isDirectory) continue
+        const jsonFiles: string[] = (await fs.readdirSync(filePath)).filter(
+          (file: string) => file === 'assistant.json'
+        )
+
+        if (jsonFiles.length !== 1) {
+          // has more than one assistant file -> ignore
+          continue
+        }
+
+        const content = await fs.readFileSync(
+          await joinPath([filePath, jsonFiles[0]]),
+          'utf-8'
+        )
+        const assistant: Assistant =
+          typeof content === 'object' ? content : JSON.parse(content)
+
+        results.push(assistant)
       }
 
-      const content = await fs.readFileSync(
-        await joinPath([filePath, jsonFiles[0]]),
-        'utf-8'
-      )
-      const assistant: Assistant =
-        typeof content === 'object' ? content : JSON.parse(content)
-
-      results.push(assistant)
+      return results
+    } catch (err) {
+      console.debug(err)
+      return [this.defaultAssistant]
     }
-
-    return results
   }
 
   async deleteAssistant(assistant: Assistant): Promise<void> {
@@ -112,39 +119,39 @@ export default class JanAssistantExtension extends AssistantExtension {
   }
 
   private async createJanAssistant(): Promise<void> {
-    const janAssistant: Assistant = {
-      avatar: '',
-      thread_location: undefined,
-      id: 'jan',
-      object: 'assistant',
-      created_at: Date.now(),
-      name: 'Jan',
-      description: 'A default assistant that can use all downloaded models',
-      model: '*',
-      instructions: '',
-      tools: [
-        {
-          type: 'retrieval',
-          enabled: false,
-          useTimeWeightedRetriever: false,
-          settings: {
-            top_k: 2,
-            chunk_size: 1024,
-            chunk_overlap: 64,
-            retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+    await this.createAssistant(this.defaultAssistant)
+  }
+
+  private defaultAssistant: Assistant = {
+    avatar: '',
+    thread_location: undefined,
+    id: 'jan',
+    object: 'assistant',
+    created_at: Date.now(),
+    name: 'Jan',
+    description: 'A default assistant that can use all downloaded models',
+    model: '*',
+    instructions: '',
+    tools: [
+      {
+        type: 'retrieval',
+        enabled: false,
+        useTimeWeightedRetriever: false,
+        settings: {
+          top_k: 2,
+          chunk_size: 1024,
+          chunk_overlap: 64,
+          retrieval_template: `Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
 ----------------
 CONTEXT: {CONTEXT}
 ----------------
 QUESTION: {QUESTION}
 ----------------
 Helpful Answer:`,
-          },
         },
-      ],
-      file_ids: [],
-      metadata: undefined,
-    }
-
-    await this.createAssistant(janAssistant)
+      },
+    ],
+    file_ids: [],
+    metadata: undefined,
   }
 }
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index ea026a59d..54e91a6aa 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -87,23 +87,22 @@ export default class JanModelExtension extends ModelExtension {
      * should compare and try import
      */
     let currentModels: Model[] = []
-
-    if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
-      // Updated from an older version than 0.5.5
-      // Scan through the models folder and import them (Legacy flow)
-      // Return models immediately
-      currentModels = await scanModelsFolder().then((models) => {
-        return models ?? []
-      })
-    } else {
-      try {
+    try {
+      if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
+        // Updated from an older version than 0.5.5
+        // Scan through the models folder and import them (Legacy flow)
+        // Return models immediately
+        currentModels = await scanModelsFolder().then((models) => {
+          return models ?? []
+        })
+      } else {
         currentModels = JSON.parse(
           localStorage.getItem(ExtensionEnum.downloadedModels)
         ) as Model[]
-      } catch (e) {
-        currentModels = []
-        console.error(e)
       }
+    } catch (e) {
+      currentModels = []
+      console.error(e)
     }
 
     /**

From 40957f7686ee69fd292ebdecbd81c0aaafb66682 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 15:21:30 +0700
Subject: [PATCH 18/71] fix: model reload state - reduce model unload events
 emit

---
 core/src/browser/models/manager.ts            |  9 +++-
 docs/src/pages/docs/built-in/llama-cpp.mdx    |  2 +-
 .../inference-cortex-extension/package.json   |  1 +
 .../rollup.config.ts                          |  2 +-
 .../inference-cortex-extension/src/index.ts   | 46 ++++++++++++-----
 .../inference-cortex-extension/tsconfig.json  |  5 +-
 web/containers/Loader/ModelReload.tsx         |  5 ++
 web/hooks/useActiveModel.ts                   | 50 +++++++++++--------
 web/hooks/useModels.ts                        |  7 ++-
 .../Thread/ThreadCenterPanel/index.tsx        | 11 +---
 web/screens/Thread/ThreadRightPanel/index.tsx | 10 +++-
 11 files changed, 92 insertions(+), 56 deletions(-)

diff --git a/core/src/browser/models/manager.ts b/core/src/browser/models/manager.ts
index 4853989fe..d5afe83d5 100644
--- a/core/src/browser/models/manager.ts
+++ b/core/src/browser/models/manager.ts
@@ -18,7 +18,14 @@ export class ModelManager {
    * @param model - The model to register.
    */
   register<T extends Model>(model: T) {
-    this.models.set(model.id, model)
+    if (this.models.has(model.id)) {
+      this.models.set(model.id, {
+        ...model,
+        ...this.models.get(model.id),
+      })
+    } else {
+      this.models.set(model.id, model)
+    }
     events.emit(ModelEvent.OnModelsUpdate, {})
   }
 
diff --git a/docs/src/pages/docs/built-in/llama-cpp.mdx b/docs/src/pages/docs/built-in/llama-cpp.mdx
index 8e2fa8498..5b7b0453a 100644
--- a/docs/src/pages/docs/built-in/llama-cpp.mdx
+++ b/docs/src/pages/docs/built-in/llama-cpp.mdx
@@ -102,7 +102,7 @@ Enable the GPU acceleration option within the Jan application by following the [
     ],
     "size": 669000000
   },
-  "engine": "llama-cpp"
+  "engine": "nitro"
 }
 ```
 ### Step 2: Modify the `model.json`
diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json
index 920989f3b..5a9fc56e9 100644
--- a/extensions/inference-cortex-extension/package.json
+++ b/extensions/inference-cortex-extension/package.json
@@ -51,6 +51,7 @@
     "decompress": "^4.2.1",
     "fetch-retry": "^5.0.6",
     "ky": "^1.7.2",
+    "p-queue": "^8.0.1",
     "rxjs": "^7.8.1",
     "tcp-port-used": "^1.0.2",
     "terminate": "2.6.1",
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index d0e9f5fbe..ea873990b 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -114,7 +114,7 @@ export default [
         ]),
         NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
-        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291/v1'),
+        CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 93036fc4d..364bfe79c 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -16,7 +16,7 @@ import {
   LocalOAIEngine,
   InferenceEngine,
 } from '@janhq/core'
-
+import PQueue from 'p-queue'
 import ky from 'ky'
 
 /**
@@ -28,12 +28,14 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   // DEPRECATED
   nodeModule: string = 'node'
 
+  queue = new PQueue({ concurrency: 1 })
+
   provider: string = InferenceEngine.cortex
 
   /**
    * The URL for making inference requests.
    */
-  inferenceUrl = `${CORTEX_API_URL}/chat/completions`
+  inferenceUrl = `${CORTEX_API_URL}/v1/chat/completions`
 
   /**
    * Subscribes to events emitted by the @janhq/core package.
@@ -47,7 +49,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     // Run the process watchdog
     const systemInfo = await systemInformation()
-    executeOnMain(NODE, 'run', systemInfo)
+    await executeOnMain(NODE, 'run', systemInfo)
+
+    this.queue.add(() => this.healthz())
   }
 
   onUnload(): void {
@@ -61,16 +65,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     // Legacy model cache - should import
     if (model.engine === InferenceEngine.nitro && model.file_path) {
       // Try importing the model
-      await ky
-        .post(`${CORTEX_API_URL}/models/${model.id}`, {
-          json: { model: model.id, modelPath: await this.modelPath(model) },
-        })
-        .json()
-        .catch((e) => log(e.message ?? e ?? ''))
+      const modelPath = await this.modelPath(model)
+      await this.queue.add(() =>
+        ky
+          .post(`${CORTEX_API_URL}/v1/models/${model.id}`, {
+            json: { model: model.id, modelPath: modelPath },
+          })
+          .json()
+          .catch((e) => log(e.message ?? e ?? ''))
+      )
     }
 
-    return ky
-      .post(`${CORTEX_API_URL}/models/start`, {
+    return await ky
+      .post(`${CORTEX_API_URL}/v1/models/start`, {
         json: {
           ...model.settings,
           model: model.id,
@@ -89,7 +96,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
   override async unloadModel(model: Model): Promise<void> {
     return ky
-      .post(`${CORTEX_API_URL}/models/stop`, {
+      .post(`${CORTEX_API_URL}/v1/models/stop`, {
         json: { model: model.id },
       })
       .json()
@@ -108,4 +115,19 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
         model.id,
     ])
   }
+
+  /**
+   * Do health check on cortex.cpp
+   * @returns
+   */
+  healthz(): Promise<void> {
+    return ky
+      .get(`${CORTEX_API_URL}/healthz`, {
+        retry: {
+          limit: 10,
+          methods: ['get'],
+        },
+      })
+      .then(() => {})
+  }
 }
diff --git a/extensions/inference-cortex-extension/tsconfig.json b/extensions/inference-cortex-extension/tsconfig.json
index bdb35163a..af00a035a 100644
--- a/extensions/inference-cortex-extension/tsconfig.json
+++ b/extensions/inference-cortex-extension/tsconfig.json
@@ -1,9 +1,8 @@
 {
   "compilerOptions": {
     "moduleResolution": "node",
-    "target": "ES2015",
-    "module": "ES2020",
-    "lib": ["es2015", "es2016", "es2017", "dom"],
+    "target": "es2016",
+    "module": "esnext",
     "strict": true,
     "sourceMap": true,
     "declaration": true,
diff --git a/web/containers/Loader/ModelReload.tsx b/web/containers/Loader/ModelReload.tsx
index fbe673788..29709c0da 100644
--- a/web/containers/Loader/ModelReload.tsx
+++ b/web/containers/Loader/ModelReload.tsx
@@ -44,6 +44,11 @@ export default function ModelReload() {
           Reloading model {stateModel.model?.id}
         </span>
       </div>
+      <div className="my-4 mb-2 text-center">
+        <span className="text-[hsla(var(--text-secondary)]">
+          Model is reloading to apply new changes.
+        </span>
+      </div>
     </div>
   )
 }
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 8dd71fcc5..353288337 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -51,6 +51,10 @@ export function useActiveModel() {
       console.debug(`Model ${modelId} is already initialized. Ignore..`)
       return Promise.resolve()
     }
+
+    if (activeModel) {
+      stopModel(activeModel)
+    }
     setPendingModelLoad(true)
 
     let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
@@ -113,7 +117,7 @@ export function useActiveModel() {
         setStateModel(() => ({
           state: 'start',
           loading: false,
-          model,
+          undefined,
         }))
 
         if (!pendingModelLoad && abortable) {
@@ -130,28 +134,30 @@ export function useActiveModel() {
       })
   }
 
-  const stopModel = useCallback(async () => {
-    const stoppingModel = activeModel || stateModel.model
-    if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading))
-      return
+  const stopModel = useCallback(
+    async (model?: Model) => {
+      const stoppingModel = model ?? activeModel ?? stateModel.model
+      if (!stoppingModel || (stateModel.state === 'stop' && stateModel.loading))
+        return
 
-    setStateModel({ state: 'stop', loading: true, model: stoppingModel })
-    const engine = EngineManager.instance().get(stoppingModel.engine)
-    return engine
-      ?.unloadModel(stoppingModel)
-      .catch((e) => console.error(e))
-      .then(() => {
-        setActiveModel(undefined)
-        setStateModel({ state: 'start', loading: false, model: undefined })
-        setPendingModelLoad(false)
-      })
-  }, [
-    activeModel,
-    setActiveModel,
-    setStateModel,
-    setPendingModelLoad,
-    stateModel,
-  ])
+      const engine = EngineManager.instance().get(stoppingModel.engine)
+      return engine
+        ?.unloadModel(stoppingModel)
+        .catch((e) => console.error(e))
+        .then(() => {
+          setActiveModel(undefined)
+          setStateModel({ state: 'start', loading: false, model: undefined })
+          setPendingModelLoad(false)
+        })
+    },
+    [
+      activeModel,
+      setStateModel,
+      setActiveModel,
+      setPendingModelLoad,
+      stateModel,
+    ]
+  )
 
   const stopInference = useCallback(async () => {
     // Loading model
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index b09839457..742d09beb 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -31,10 +31,9 @@ const useModels = () => {
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
       const localModels = await getModels()
-      const remoteModels = ModelManager.instance()
-        .models.values()
-        .toArray()
-        .filter((e) => !isLocalEngine(e.engine))
+      const hubModels = ModelManager.instance().models.values().toArray()
+
+      const remoteModels = hubModels.filter((e) => !isLocalEngine(e.engine))
       setDownloadedModels([...localModels, ...remoteModels])
     }
 
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index fe7993e9a..c83a38a1a 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -199,16 +199,7 @@ const ThreadCenterPanel = () => {
 
           {!engineParamsUpdate && <ModelStart />}
 
-          {reloadModel && (
-            <Fragment>
-              <ModelReload />
-              <div className="mb-2 text-center">
-                <span className="text-[hsla(var(--text-secondary)]">
-                  Model is reloading to apply new changes.
-                </span>
-              </div>
-            </Fragment>
-          )}
+          {reloadModel && <ModelReload />}
 
           {activeModel && isGeneratingResponse && <GenerateResponse />}
           <ChatInput />
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 7ccc4957a..5a8fd3ebb 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -15,6 +15,8 @@ import {
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
+import { useDebouncedCallback } from 'use-debounce'
+
 import CopyOverInstruction from '@/containers/CopyInstruction'
 import EngineSetting from '@/containers/EngineSetting'
 import ModelDropdown from '@/containers/ModelDropdown'
@@ -168,6 +170,10 @@ const ThreadRightPanel = () => {
     [activeThread, updateThreadMetadata]
   )
 
+  const resetModel = useDebouncedCallback(() => {
+    stopModel()
+  }, 300)
+
   const onValueChanged = useCallback(
     (key: string, value: string | number | boolean) => {
       if (!activeThread) {
@@ -175,7 +181,7 @@ const ThreadRightPanel = () => {
       }
 
       setEngineParamsUpdate(true)
-      stopModel()
+      resetModel()
 
       updateModelParameter(activeThread, {
         params: { [key]: value },
@@ -207,7 +213,7 @@ const ThreadRightPanel = () => {
         }
       }
     },
-    [activeThread, setEngineParamsUpdate, stopModel, updateModelParameter]
+    [activeThread, resetModel, setEngineParamsUpdate, updateModelParameter]
   )
 
   if (!activeThread) {

From 8ccbb56f951f7fe3caa5a2e5213dfd985fd96148 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 15:38:32 +0700
Subject: [PATCH 19/71] chore: unload model on message error - so users can
 attempt to start

---
 web/containers/Providers/EventHandler.tsx | 3 +++
 web/hooks/useActiveModel.ts               | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index 5cc92219c..72d35aad3 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -179,6 +179,9 @@ export default function EventHandler({ children }: { children: ReactNode }) {
           setIsGeneratingResponse(false)
         }
         return
+      } else if (message.status === MessageStatus.Error) {
+        setActiveModel(undefined)
+        setStateModel({ state: 'start', loading: false, model: undefined })
       }
       // Mark the thread as not waiting for response
       updateThreadWaiting(message.thread_id, false)
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 353288337..7b9ee98e0 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -53,7 +53,7 @@ export function useActiveModel() {
     }
 
     if (activeModel) {
-      stopModel(activeModel)
+      await stopModel(activeModel)
     }
     setPendingModelLoad(true)
 

From 981675f3658dc8aea087c70fed08ff508271d57d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 16:03:07 +0700
Subject: [PATCH 20/71] chore: API server works with cortex.cpp

---
 core/src/node/api/restful/helper/builder.ts   |  2 +-
 core/src/node/api/restful/helper/consts.ts    | 14 +---
 .../node/api/restful/helper/startStopModel.ts | 73 +++----------------
 3 files changed, 12 insertions(+), 77 deletions(-)

diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
index db2000d69..da33808dc 100644
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
 
   // add engine for new cortex cpp engine
   if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'cortex.llamacpp'
+    request.body.engine = 'llama-cpp'
   }
 
   const fetch = require('node-fetch')
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
index 8d8f8e341..0f57bb5ff 100644
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@@ -1,19 +1,9 @@
 // The PORT to use for the Nitro subprocess
-export const NITRO_DEFAULT_PORT = 3928
+export const CORTEX_DEFAULT_PORT = 39291
 
 // The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'
 
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
 
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
-// The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-
-// The URL for the Nitro subprocess to kill itself
-export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
index d1a23dca9..857567612 100644
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@@ -1,6 +1,5 @@
-import { join } from 'path'
-import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
 import { ModelSettingParams } from '../../../../types'
+import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
 
 /**
  * Start a model
@@ -9,70 +8,16 @@ import { ModelSettingParams } from '../../../../types'
  * @returns
  */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  try {
-    await runModel(modelId, settingParams)
-
-    return {
-      message: `Model ${modelId} started`,
-    }
-  } catch (e) {
-    return {
-      error: e,
-    }
-  }
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
+    body: JSON.stringify({ model: modelId, ...settingParams }),
+  })
 }
 
-/**
- * Run a model using installed cortex extension
- * @param model
- * @param settingParams
- */
-const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
-  const janDataFolderPath = getJanDataFolderPath()
-  const modelFolder = join(janDataFolderPath, 'models', model)
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .loadModel(
-        {
-          modelFolder,
-          model,
-        },
-        settingParams
-      )
-      .then(() => log(`[SERVER]::Debug: Model is loaded`))
-      .then({
-        message: 'Model started',
-      })
-  )
-}
 /*
- * Stop model and kill nitro process.
+ * Stop model.
  */
-export const stopModel = async (_modelId: string) => {
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .unloadModel()
-      .then(() => log(`[SERVER]::Debug: Model is unloaded`))
-      .then({
-        message: 'Model stopped',
-      })
-  )
+export const stopModel = async (modelId: string) => {
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
+    body: JSON.stringify({ model: modelId }),
+  })
 }

From e8a9e8e28c439d3579df88b00f088ad3eb752f61 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 16:16:43 +0700
Subject: [PATCH 21/71] fix: inherits model decoration metadata from Jan

---
 web/hooks/useModels.ts | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 742d09beb..f884a591e 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -30,10 +30,17 @@ const useModels = () => {
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
-      const localModels = await getModels()
-      const hubModels = ModelManager.instance().models.values().toArray()
+      const localModels = (await getModels()).map((e) => ({
+        ...e,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.name,
+        metadata:
+          ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
+      }))
 
-      const remoteModels = hubModels.filter((e) => !isLocalEngine(e.engine))
+      const remoteModels = ModelManager.instance()
+        .models.values()
+        .toArray()
+        .filter((e) => !isLocalEngine(e.engine))
       setDownloadedModels([...localModels, ...remoteModels])
     }
 

From 9afbfd609a17ffb1c19533e371d1bd9c63b62068 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 17:09:24 +0700
Subject: [PATCH 22/71] test: correct test cases

---
 .../node/api/restful/helper/consts.test.ts    |  9 ++++-----
 .../api/restful/helper/startStopModel.test.ts | 20 +++++++------------
 .../node/api/restful/helper/startStopModel.ts |  2 ++
 web/hooks/useModels.test.ts                   |  1 +
 4 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/core/src/node/api/restful/helper/consts.test.ts b/core/src/node/api/restful/helper/consts.test.ts
index 34d42dcf0..524f0cbeb 100644
--- a/core/src/node/api/restful/helper/consts.test.ts
+++ b/core/src/node/api/restful/helper/consts.test.ts
@@ -1,6 +1,5 @@
+import { CORTEX_DEFAULT_PORT } from './consts'
 
-import { NITRO_DEFAULT_PORT } from './consts';
-
-it('should test NITRO_DEFAULT_PORT', () => {
-  expect(NITRO_DEFAULT_PORT).toBe(3928);
-});
+it('should test CORTEX_DEFAULT_PORT', () => {
+  expect(CORTEX_DEFAULT_PORT).toBe(39291)
+})
diff --git a/core/src/node/api/restful/helper/startStopModel.test.ts b/core/src/node/api/restful/helper/startStopModel.test.ts
index a5475cc28..7c1a56cf1 100644
--- a/core/src/node/api/restful/helper/startStopModel.test.ts
+++ b/core/src/node/api/restful/helper/startStopModel.test.ts
@@ -1,16 +1,10 @@
+import { startModel } from './startStopModel'
 
+describe('startModel', () => {
+  it('test_startModel_error', async () => {
+    const modelId = 'testModelId'
+    const settingParams = undefined
 
-  import { startModel } from './startStopModel'
-  
-  describe('startModel', () => {
-    it('test_startModel_error', async () => {
-      const modelId = 'testModelId'
-      const settingParams = undefined
-  
-      const result = await startModel(modelId, settingParams)
-  
-      expect(result).toEqual({
-        error: expect.any(Error),
-      })
-    })
+    expect(startModel(modelId, settingParams)).resolves.toThrow()
   })
+})
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
index 857567612..2e9db6d15 100644
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@@ -9,6 +9,7 @@ import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
  */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
   return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
+    method: 'POST',
     body: JSON.stringify({ model: modelId, ...settingParams }),
   })
 }
@@ -18,6 +19,7 @@ export const startModel = async (modelId: string, settingParams?: ModelSettingPa
  */
 export const stopModel = async (modelId: string) => {
   return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
+    method: 'POST',
     body: JSON.stringify({ model: modelId }),
   })
 }
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index b58102144..2def2b745 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -31,6 +31,7 @@ describe('useModels', () => {
             filter: () => models,
           }),
         }),
+        get: () => undefined,
       },
     })
 

From 7ce0625047e29678973ed8918eec492625c53e13 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 17:36:31 +0700
Subject: [PATCH 23/71] test: add playwright report artifact

---
 .github/workflows/jan-electron-linter-and-test.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml
index 4e20d6c5f..b2105acb4 100644
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@@ -319,6 +319,13 @@ jobs:
         #   TURBO_TEAM: 'linux'
         #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
 
+      - uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: playwright-report
+          path: electron/playwright-report/
+          retention-days: 2
+
   coverage-check:
     runs-on: [self-hosted, Linux, ubuntu-desktop]
     needs: base_branch_cov

From 2d80d6962b1bd7ffeeca89d405e94a48a021c07a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 17:56:36 +0700
Subject: [PATCH 24/71] test: try to scroll to bottom e2e

---
 .github/workflows/jan-electron-linter-and-test.yml |  1 -
 electron/tests/e2e/hub.e2e.spec.ts                 |  1 +
 electron/tests/pages/basePage.ts                   | 11 ++++++++---
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml
index b2105acb4..300ee59bc 100644
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@@ -320,7 +320,6 @@ jobs:
         #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
 
       - uses: actions/upload-artifact@v4
-        if: always()
         with:
           name: playwright-report
           path: electron/playwright-report/
diff --git a/electron/tests/e2e/hub.e2e.spec.ts b/electron/tests/e2e/hub.e2e.spec.ts
index 9b1b8305f..ef305e9c3 100644
--- a/electron/tests/e2e/hub.e2e.spec.ts
+++ b/electron/tests/e2e/hub.e2e.spec.ts
@@ -16,6 +16,7 @@ test.beforeAll(async () => {
 test('explores hub', async ({ hubPage }) => {
   await hubPage.navigateByMenu()
   await hubPage.verifyContainerVisible()
+  await hubPage.scrollToBottom()
   const useModelBtn = page.getByTestId(/^use-model-btn-.*/).first()
 
   await expect(useModelBtn).toBeVisible({
diff --git a/electron/tests/pages/basePage.ts b/electron/tests/pages/basePage.ts
index 1817bc731..11e3ba81a 100644
--- a/electron/tests/pages/basePage.ts
+++ b/electron/tests/pages/basePage.ts
@@ -8,9 +8,8 @@ export class BasePage {
   constructor(
     protected readonly page: Page,
     readonly action: CommonActions,
-    protected containerId: string,
-  ) {
-  }
+    protected containerId: string
+  ) {}
 
   public getValue(key: string) {
     return this.action.getValue(key)
@@ -37,6 +36,12 @@ export class BasePage {
     expect(container.isVisible()).toBeTruthy()
   }
 
+  async scrollToBottom() {
+    await this.page.evaluate(() => {
+      window.scrollTo(0, document.body.scrollHeight)
+    })
+  }
+
   async waitUpdateLoader() {
     await this.isElementVisible('img[alt="Jan - Logo"]')
   }

From 03333cc4c2af4f52b1ad2fddcb62636ff9b1e102 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 19:04:24 +0700
Subject: [PATCH 25/71] fix: onboarding should cover cortex models - debounce
 reduce model reload - rename cortex binary name

---
 .../jan-electron-linter-and-test.yml          |  1 +
 .../src/node/execute.test.ts                  | 28 +++++++++----------
 .../src/node/execute.ts                       |  2 +-
 web/hooks/useModels.ts                        | 10 +++++--
 .../ChatBody/EmptyThread/index.tsx            |  5 ++--
 5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/jan-electron-linter-and-test.yml b/.github/workflows/jan-electron-linter-and-test.yml
index 300ee59bc..b2105acb4 100644
--- a/.github/workflows/jan-electron-linter-and-test.yml
+++ b/.github/workflows/jan-electron-linter-and-test.yml
@@ -320,6 +320,7 @@ jobs:
         #   TURBO_TOKEN: '${{ secrets.TURBO_TOKEN }}'
 
       - uses: actions/upload-artifact@v4
+        if: always()
         with:
           name: playwright-report
           path: electron/playwright-report/
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index 89110fbd9..a1b5c4ba4 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -49,7 +49,7 @@ describe('test executable cortex file', () => {
         enginePath: expect.stringContaining(`mac-arm64`),
         executablePath:
           originalPlatform === 'darwin'
-            ? expect.stringContaining(`/cortex`)
+            ? expect.stringContaining(`/cortex-server`)
             : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -63,7 +63,7 @@ describe('test executable cortex file', () => {
         enginePath: expect.stringContaining(`mac-x64`),
         executablePath:
           originalPlatform === 'darwin'
-            ? expect.stringContaining(`/cortex`)
+            ? expect.stringContaining(`/cortex-server`)
             : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -82,7 +82,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win`),
-        executablePath: expect.stringContaining(`/cortex.exe`),
+        executablePath: expect.stringContaining(`/cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -116,7 +116,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win-cuda-11-7`),
-        executablePath: expect.stringContaining(`/cortex.exe`),
+        executablePath: expect.stringContaining(`/cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -150,7 +150,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`win-cuda-12-0`),
-        executablePath: expect.stringContaining(`/cortex.exe`),
+        executablePath: expect.stringContaining(`/cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -168,7 +168,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux`),
-        executablePath: expect.stringContaining(`/cortex`),
+        executablePath: expect.stringContaining(`/cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -202,7 +202,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux-cuda-11-7`),
-        executablePath: expect.stringContaining(`/cortex`),
+        executablePath: expect.stringContaining(`/cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -236,7 +236,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`linux-cuda-12-0`),
-        executablePath: expect.stringContaining(`/cortex`),
+        executablePath: expect.stringContaining(`/cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -260,7 +260,7 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-${instruction}`),
-          executablePath: expect.stringContaining(`/cortex`),
+          executablePath: expect.stringContaining(`/cortex-server`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -283,7 +283,7 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`win-${instruction}`),
-          executablePath: expect.stringContaining(`/cortex.exe`),
+          executablePath: expect.stringContaining(`/cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
@@ -322,7 +322,7 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`win-cuda-12-0`),
-          executablePath: expect.stringContaining(`/cortex.exe`),
+          executablePath: expect.stringContaining(`/cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -361,7 +361,7 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-cuda-12-0`),
-          executablePath: expect.stringContaining(`/cortex`),
+          executablePath: expect.stringContaining(`/cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -401,7 +401,7 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`linux-vulkan`),
-          executablePath: expect.stringContaining(`/cortex`),
+          executablePath: expect.stringContaining(`/cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -429,7 +429,7 @@ describe('test executable cortex file', () => {
           enginePath: expect.stringContaining(`mac-x64`),
           executablePath:
             originalPlatform === 'darwin'
-              ? expect.stringContaining(`/cortex`)
+              ? expect.stringContaining(`/cortex-server`)
               : expect.anything(),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 0febe8adf..b5f848332 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -99,7 +99,7 @@ export const executableCortexFile = (
     .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
-  let binaryName = `cortex${extension()}`
+  let binaryName = `cortex-server${extension()}`
 
   return {
     enginePath: path.join(__dirname, '..', 'bin', engineFolder),
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index f884a591e..2b5351098 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -11,6 +11,8 @@ import {
 
 import { useSetAtom } from 'jotai'
 
+import { useDebouncedCallback } from 'use-debounce'
+
 import { isLocalEngine } from '@/utils/modelEngine'
 
 import { extensionManager } from '@/extension'
@@ -53,17 +55,19 @@ const useModels = () => {
     Promise.all([getDownloadedModels(), getExtensionModels()])
   }, [setDownloadedModels, setExtensionModels])
 
+  const reloadData = useDebouncedCallback(() => getData(), 300)
+
   useEffect(() => {
     // Try get data on mount
-    getData()
+    reloadData()
 
     // Listen for model updates
-    events.on(ModelEvent.OnModelsUpdate, async () => getData())
+    events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
     return () => {
       // Remove listener on unmount
       events.off(ModelEvent.OnModelsUpdate, async () => {})
     }
-  }, [getData])
+  }, [reloadData])
 }
 
 const getModels = async (): Promise<Model[]> =>
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
index 6fc05d44b..a99e6306f 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
@@ -8,6 +8,8 @@ import LogoMark from '@/containers/Brand/Logo/Mark'
 
 import { MainViewState } from '@/constants/screens'
 
+import { isLocalEngine } from '@/utils/modelEngine'
+
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
 
@@ -15,8 +17,7 @@ const EmptyThread = () => {
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setMainViewState = useSetAtom(mainViewStateAtom)
   const showOnboardingStep =
-    downloadedModels.filter((e) => e.engine === InferenceEngine.nitro)
-      .length === 0
+    downloadedModels.filter((e) => isLocalEngine(e.engine)).length === 0
 
   return (
     <div className="mx-auto flex h-full flex-col items-center justify-center text-center">

From 6c0c2a00d6e3585ed78fab9eede435e9d7c7616b Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 20:33:56 +0700
Subject: [PATCH 26/71] test: fix getModels test case does not work with
 useDebounce

---
 web/hooks/useModels.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 2b5351098..3d6f7609b 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -59,7 +59,7 @@ const useModels = () => {
 
   useEffect(() => {
     // Try get data on mount
-    reloadData()
+    getData()
 
     // Listen for model updates
     events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
@@ -67,7 +67,7 @@ const useModels = () => {
       // Remove listener on unmount
       events.off(ModelEvent.OnModelsUpdate, async () => {})
     }
-  }, [reloadData])
+  }, [getData, reloadData])
 }
 
 const getModels = async (): Promise<Model[]> =>

From a0e2f16a3bee9abe0628c624d4c10f7c40da3976 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 23 Oct 2024 15:33:14 +0700
Subject: [PATCH 27/71] chore: binary naming convention - following llama.cpp
 release

---
 .../bin/version.txt                           |  1 +
 .../inference-cortex-extension/download.bat   | 19 +++---
 .../inference-cortex-extension/download.sh    | 26 ++++----
 .../src/node/execute.test.ts                  | 59 ++++++++++++-------
 .../src/node/execute.ts                       | 37 +++++++-----
 .../src/node/index.ts                         | 10 +++-
 web/hooks/useModels.test.ts                   |  3 +
 web/hooks/useModels.ts                        |  2 +-
 8 files changed, 97 insertions(+), 60 deletions(-)
 create mode 100644 extensions/inference-cortex-extension/bin/version.txt

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
new file mode 100644
index 000000000..7f207341d
--- /dev/null
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -0,0 +1 @@
+1.0.1
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index d764b6df8..0b13ee872 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -8,13 +8,18 @@ set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VER
 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
 
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index fe1f8af9f..d04f0482d 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -9,26 +9,30 @@ OS_TYPE=$(uname)
 
 if [ "$OS_TYPE" == "Linux" ]; then
     # Linux downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz"  -e --strip 1 -o "./bin"
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
     chmod +x "./bin/cortex"
 
     # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz"  -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz"  -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz"  -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz"  -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz"  -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz"  -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
+    download "${ENGINE_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
+    download "${ENGINE_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
-    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz"  -e --strip 1 -o "./bin" 1
+    download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
     chmod +x "./bin/cortex"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index a1b5c4ba4..3c0b32df5 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -27,8 +27,8 @@ jest.mock('cpu-instructions', () => ({
     cpuInfo: jest.fn(),
   },
 }))
-let mock = cpuInfo.cpuInfo as jest.Mock
-mock.mockReturnValue([])
+let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
+mockCpuInfo.mockReturnValue([])
 
 describe('test executable cortex file', () => {
   afterAll(function () {
@@ -46,7 +46,8 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-arm64`),
+        enginePath: expect.stringContaining(`/bin/arm64`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`/cortex-server`)
@@ -60,7 +61,8 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`mac-x64`),
+        enginePath: expect.stringContaining(`/bin/x64`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`/cortex-server`)
@@ -79,9 +81,11 @@ describe('test executable cortex file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
+    mockCpuInfo.mockReturnValue(['avx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`win`),
+        enginePath: expect.stringContaining(`/bin/avx`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath: expect.stringContaining(`/cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -115,7 +119,8 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-11-7`),
+        enginePath: expect.stringContaining(`cuda-11-7`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath: expect.stringContaining(`/cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -149,7 +154,8 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`win-cuda-12-0`),
+        enginePath: expect.stringContaining(`cuda-12-0`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath: expect.stringContaining(`/cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -165,9 +171,10 @@ describe('test executable cortex file', () => {
       ...testSettings,
       run_mode: 'cpu',
     }
+    mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`linux`),
+        enginePath: expect.stringContaining(`noavx`),
         executablePath: expect.stringContaining(`/cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -201,7 +208,8 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-11-7`),
+        enginePath: expect.stringContaining(`cuda-11-7`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath: expect.stringContaining(`/cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -235,7 +243,8 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`linux-cuda-12-0`),
+        enginePath: expect.stringContaining(`cuda-12-0`),
+        binPath: expect.stringContaining(`/bin`),
         executablePath: expect.stringContaining(`/cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -255,11 +264,12 @@ describe('test executable cortex file', () => {
 
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
 
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-${instruction}`),
+          enginePath: expect.stringContaining(instruction),
+          binPath: expect.stringContaining(`/bin`),
           executablePath: expect.stringContaining(`/cortex-server`),
 
           cudaVisibleDevices: '',
@@ -279,10 +289,11 @@ describe('test executable cortex file', () => {
     }
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`win-${instruction}`),
+          enginePath: expect.stringContaining(instruction),
+          binPath: expect.stringContaining(`/bin`),
           executablePath: expect.stringContaining(`/cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -318,10 +329,11 @@ describe('test executable cortex file', () => {
     }
     const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx']
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`win-cuda-12-0`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
+          binPath: expect.stringContaining(`/bin`),
           executablePath: expect.stringContaining(`/cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -357,10 +369,11 @@ describe('test executable cortex file', () => {
       ],
     }
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-cuda-12-0`),
+          enginePath: expect.stringContaining(`cuda-12-0`),
+          binPath: expect.stringContaining(`/bin`),
           executablePath: expect.stringContaining(`/cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -397,10 +410,11 @@ describe('test executable cortex file', () => {
       ],
     }
     cpuInstructions.forEach((instruction) => {
-      mock.mockReturnValue([instruction])
+      mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`linux-vulkan`),
+          enginePath: expect.stringContaining(`vulkan`),
+          binPath: expect.stringContaining(`/bin`),
           executablePath: expect.stringContaining(`/cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -423,10 +437,11 @@ describe('test executable cortex file', () => {
         ...testSettings,
         run_mode: 'cpu',
       }
-      mock.mockReturnValue([])
+      mockCpuInfo.mockReturnValue([])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`mac-x64`),
+          enginePath: expect.stringContaining(`x64`),
+          binPath: expect.stringContaining(`/bin`),
           executablePath:
             originalPlatform === 'darwin'
               ? expect.stringContaining(`/cortex-server`)
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index b5f848332..18d840fdd 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -4,6 +4,7 @@ import { cpuInfo } from 'cpu-instructions'
 
 export interface CortexExecutableOptions {
   enginePath: string
+  binPath: string
   executablePath: string
   cudaVisibleDevices: string
   vkVisibleDevices: string
@@ -36,8 +37,8 @@ const os = (): string => {
     ? 'win'
     : process.platform === 'darwin'
       ? process.arch === 'arm64'
-        ? 'mac-arm64'
-        : 'mac-x64'
+        ? 'arm64'
+        : 'x64'
       : 'linux'
 }
 
@@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
  * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
  * @returns
  */
-const cpuInstructions = () => {
+const cpuInstructions = (): string => {
   if (process.platform === 'darwin') return ''
   return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
     ? 'avx512'
@@ -84,26 +85,30 @@ const cpuInstructions = () => {
 export const executableCortexFile = (
   gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
-  let engineFolder = [
-    os(),
-    ...(gpuSetting?.vulkan
-      ? []
+  const cpuInstruction = cpuInstructions()
+  let engineFolder = gpuSetting?.vulkan
+    ? 'vulkan'
+    : process.platform === 'darwin'
+      ? os()
       : [
-          gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '',
+          gpuRunMode(gpuSetting) !== 'cuda' ||
+          cpuInstruction === 'avx' ||
+          cpuInstruction === 'noavx'
+            ? cpuInstruction
+            : '',
           gpuRunMode(gpuSetting),
           cudaVersion(gpuSetting),
-        ]),
-    gpuSetting?.vulkan ? 'vulkan' : undefined,
-  ]
-    .filter((e) => !!e)
-    .join('-')
+        ]
+          .filter((e) => !!e)
+          .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let binaryName = `cortex-server${extension()}`
-
+  const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: path.join(__dirname, '..', 'bin', engineFolder),
-    executablePath: path.join(__dirname, '..', 'bin', binaryName),
+    enginePath: path.join(binPath, engineFolder),
+    executablePath: path.join(binPath, binaryName),
+    binPath: binPath,
     cudaVisibleDevices,
     vkVisibleDevices,
   }
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 788318c84..c54dae4c0 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -27,17 +27,21 @@ function run(systemInfo?: SystemInformation): Promise<any> {
 
     // Execute the binary
     log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
-    log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`)
+    log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
     // Add engine path to the PATH and LD_LIBRARY_PATH
     process.env.PATH = (process.env.PATH || '').concat(
       path.delimiter,
-      executableOptions.enginePath
+      executableOptions.enginePath,
+      path.delimiter,
+      executableOptions.binPath
     )
     log(`[CORTEX] PATH: ${process.env.PATH}`)
     process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
       path.delimiter,
-      executableOptions.enginePath
+      executableOptions.enginePath,
+      path.delimiter,
+      executableOptions.binPath
     )
 
     const dataFolderPath = getJanDataFolderPath()
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 2def2b745..0440b5443 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -6,6 +6,9 @@ import { extensionManager } from '@/extension'
 // Mock dependencies
 jest.mock('@janhq/core')
 jest.mock('@/extension')
+jest.mock('use-debounce', () => ({
+  useDebouncedCallback: jest.fn().mockImplementation((fn) => fn),
+}))
 
 import useModels from './useModels'
 
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 3d6f7609b..8bdbd6a90 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -59,7 +59,7 @@ const useModels = () => {
 
   useEffect(() => {
     // Try get data on mount
-    getData()
+    reloadData()
 
     // Listen for model updates
     events.on(ModelEvent.OnModelsUpdate, async () => reloadData())

From dc87f37a9bc52c3e99ba721890cc133c300e35d2 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 23 Oct 2024 18:08:47 +0700
Subject: [PATCH 28/71] fix: package cortex.cpp engines and cuda on windows

---
 .../inference-cortex-extension/download.bat   |  9 +--
 .../inference-cortex-extension/download.sh    |  7 +-
 .../src/node/execute.test.ts                  | 66 ++++++++++---------
 .../src/node/execute.ts                       |  5 +-
 4 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 0b13ee872..e19786971 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -3,8 +3,9 @@ set BIN_PATH=./bin
 set /p CORTEX_VERSION=<./bin/version.txt
 
 @REM Download cortex.llamacpp binaries
-set VERSION=v0.1.25
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64
+set VERSION=v0.1.35
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
 
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
@@ -17,8 +18,8 @@ call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
-call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
 
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index d04f0482d..9426bc872 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -3,7 +3,8 @@
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
 # Detect platform
 OS_TYPE=$(uname)
 
@@ -22,8 +23,8 @@ if [ "$OS_TYPE" == "Linux" ]; then
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
-    download "${ENGINE_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
+    download "${CUDA_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
+    download "${CUDA_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index 3c0b32df5..622eb38af 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -46,11 +46,11 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`/bin/arm64`),
-        binPath: expect.stringContaining(`/bin`),
+        enginePath: expect.stringContaining(`arm64`),
+        binPath: expect.stringContaining(`bin`),
         executablePath:
           originalPlatform === 'darwin'
-            ? expect.stringContaining(`/cortex-server`)
+            ? expect.stringContaining(`cortex-server`)
             : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -61,11 +61,11 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`/bin/x64`),
-        binPath: expect.stringContaining(`/bin`),
+        enginePath: expect.stringContaining(`x64`),
+        binPath: expect.stringContaining(`bin`),
         executablePath:
           originalPlatform === 'darwin'
-            ? expect.stringContaining(`/cortex-server`)
+            ? expect.stringContaining(`cortex-server`)
             : expect.anything(),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -84,9 +84,9 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`/bin/avx`),
-        binPath: expect.stringContaining(`/bin`),
-        executablePath: expect.stringContaining(`/cortex-server.exe`),
+        enginePath: expect.stringContaining(`avx`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -117,11 +117,12 @@ describe('test executable cortex file', () => {
         },
       ],
     }
+    mockCpuInfo.mockReturnValue(['avx2'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
-        binPath: expect.stringContaining(`/bin`),
-        executablePath: expect.stringContaining(`/cortex-server.exe`),
+        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -152,11 +153,12 @@ describe('test executable cortex file', () => {
         },
       ],
     }
+    mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
-        binPath: expect.stringContaining(`/bin`),
-        executablePath: expect.stringContaining(`/cortex-server.exe`),
+        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -175,7 +177,7 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`noavx`),
-        executablePath: expect.stringContaining(`/cortex-server`),
+        executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
@@ -209,8 +211,8 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`cuda-11-7`),
-        binPath: expect.stringContaining(`/bin`),
-        executablePath: expect.stringContaining(`/cortex-server`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -244,8 +246,8 @@ describe('test executable cortex file', () => {
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
         enginePath: expect.stringContaining(`cuda-12-0`),
-        binPath: expect.stringContaining(`/bin`),
-        executablePath: expect.stringContaining(`/cortex-server`),
+        binPath: expect.stringContaining(`bin`),
+        executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
@@ -269,8 +271,8 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`/bin`),
-          executablePath: expect.stringContaining(`/cortex-server`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -293,8 +295,8 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`/bin`),
-          executablePath: expect.stringContaining(`/cortex-server.exe`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
@@ -333,8 +335,8 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`/bin`),
-          executablePath: expect.stringContaining(`/cortex-server.exe`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -373,8 +375,8 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`/bin`),
-          executablePath: expect.stringContaining(`/cortex-server`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -414,8 +416,8 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`vulkan`),
-          binPath: expect.stringContaining(`/bin`),
-          executablePath: expect.stringContaining(`/cortex-server`),
+          binPath: expect.stringContaining(`bin`),
+          executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
@@ -441,10 +443,10 @@ describe('test executable cortex file', () => {
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
           enginePath: expect.stringContaining(`x64`),
-          binPath: expect.stringContaining(`/bin`),
+          binPath: expect.stringContaining(`bin`),
           executablePath:
             originalPlatform === 'darwin'
-              ? expect.stringContaining(`/cortex-server`)
+              ? expect.stringContaining(`cortex-server`)
               : expect.anything(),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 18d840fdd..b8e768bb9 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -92,10 +92,9 @@ export const executableCortexFile = (
       ? os()
       : [
           gpuRunMode(gpuSetting) !== 'cuda' ||
-          cpuInstruction === 'avx' ||
-          cpuInstruction === 'noavx'
+          cpuInstruction === 'avx2'
             ? cpuInstruction
-            : '',
+            : 'noavx',
           gpuRunMode(gpuSetting),
           cudaVersion(gpuSetting),
         ]

From 5f075c855452dd4587ca307e792ddd5717fbf9d6 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 23 Oct 2024 18:30:25 +0700
Subject: [PATCH 29/71] fix: prebundle cudart and cublas

---
 .../inference-cortex-extension/download.bat    |  4 ++--
 .../inference-cortex-extension/download.sh     |  4 ++--
 .../src/node/execute.ts                        | 18 +++++++++---------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index e19786971..6d917cf79 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -18,8 +18,8 @@ call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
-call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
-call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
 
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 9426bc872..3e809765c 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -23,8 +23,8 @@ if [ "$OS_TYPE" == "Linux" ]; then
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${CUDA_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
-    download "${CUDA_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index b8e768bb9..74ffb48c6 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -91,15 +91,15 @@ export const executableCortexFile = (
     : process.platform === 'darwin'
       ? os()
       : [
-          gpuRunMode(gpuSetting) !== 'cuda' ||
-          cpuInstruction === 'avx2'
-            ? cpuInstruction
-            : 'noavx',
-          gpuRunMode(gpuSetting),
-          cudaVersion(gpuSetting),
-        ]
-          .filter((e) => !!e)
-          .join('-')
+        gpuRunMode(gpuSetting) !== 'cuda' ||
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? cpuInstruction
+          : 'noavx',
+        gpuRunMode(gpuSetting),
+        cudaVersion(gpuSetting),
+      ]
+        .filter((e) => !!e)
+        .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let binaryName = `cortex-server${extension()}`

From 8f778ee90f0440192c5098d8d58ec41686b8c66b Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 24 Oct 2024 14:49:18 +0700
Subject: [PATCH 30/71] feat: app supports cortex.cpp model downloader and
 legacy downloader - maintain legacy JSON models

---
 core/src/types/api/index.ts                   | 10 +-
 extensions/model-extension/src/cortex.ts      | 17 +++-
 extensions/model-extension/src/index.ts       | 87 +++++++++++++++--
 .../model-extension/src/legacy/download.ts    | 97 +++++++++++++++++++
 .../src/{ => legacy}/model-json.test.ts       |  0
 .../src/{ => legacy}/model-json.ts            |  2 +-
 web/containers/Providers/EventListener.tsx    | 10 +-
 7 files changed, 197 insertions(+), 26 deletions(-)
 create mode 100644 extensions/model-extension/src/legacy/download.ts
 rename extensions/model-extension/src/{ => legacy}/model-json.test.ts (100%)
 rename extensions/model-extension/src/{ => legacy}/model-json.ts (97%)

diff --git a/core/src/types/api/index.ts b/core/src/types/api/index.ts
index c0de0f5e8..093314a15 100644
--- a/core/src/types/api/index.ts
+++ b/core/src/types/api/index.ts
@@ -69,11 +69,11 @@ export enum DownloadRoute {
 }
 
 export enum DownloadEvent {
-  onFileDownloadUpdate = 'DownloadUpdated',
-  onFileDownloadError = 'DownloadError',
-  onFileDownloadSuccess = 'DownloadSuccess',
-  onFileDownloadStopped = 'DownloadStopped',
-  onFileDownloadStarted = 'DownloadStarted',
+  onFileDownloadUpdate = 'onFileDownloadUpdate',
+  onFileDownloadError = 'onFileDownloadError',
+  onFileDownloadSuccess = 'onFileDownloadSuccess',
+  onFileDownloadStopped = 'onFileDownloadStopped',
+  onFileDownloadStarted = 'onFileDownloadStarted',
   onFileUnzipSuccess = 'onFileUnzipSuccess',
 }
 
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index b0acd6d08..c690f0c16 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -25,6 +25,14 @@ type ModelList = {
   data: any[]
 }
 
+enum DownloadTypes {
+  DownloadUpdated = 'onFileDownloadUpdate',
+  DownloadError = 'onFileDownloadError',
+  DownloadSuccess = 'onFileDownloadSuccess',
+  DownloadStopped = 'onFileDownloadStopped',
+  DownloadStarted = 'onFileDownloadStarted',
+}
+
 export class CortexAPI implements ICortexAPI {
   queue = new PQueue({ concurrency: 1 })
   socket?: WebSocket = undefined
@@ -159,17 +167,16 @@ export class CortexAPI implements ICortexAPI {
           this.socket.addEventListener('message', (event) => {
             const data = JSON.parse(event.data)
             const transferred = data.task.items.reduce(
-              (accumulator, currentValue) =>
-                accumulator + currentValue.downloadedBytes,
+              (acc, cur) => acc + cur.downloadedBytes,
               0
             )
             const total = data.task.items.reduce(
-              (accumulator, currentValue) => accumulator + currentValue.bytes,
+              (acc, cur) => acc + cur.bytes,
               0
             )
             const percent = (transferred / total || 0) * 100
 
-            events.emit(data.type, {
+            events.emit(DownloadTypes[data.type], {
               modelId: data.task.id,
               percent: percent,
               size: {
@@ -178,7 +185,7 @@ export class CortexAPI implements ICortexAPI {
               },
             })
             // Update models list from Hub
-            if (data.type === DownloadEvent.onFileDownloadSuccess) {
+            if (data.type === DownloadTypes.DownloadSuccess) {
               // Delay for the state update from cortex.cpp
               // Just to be sure
               setTimeout(() => {
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 54e91a6aa..3696acd79 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -4,9 +4,15 @@ import {
   InferenceEngine,
   joinPath,
   dirName,
+  ModelManager,
+  abortDownload,
+  DownloadState,
+  events,
+  DownloadEvent,
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
-import { scanModelsFolder } from './model-json'
+import { scanModelsFolder } from './legacy/model-json'
+import { downloadModel } from './legacy/download'
 
 declare const SETTINGS: Array<any>
 
@@ -34,6 +40,9 @@ export default class JanModelExtension extends ModelExtension {
     this.getModels().then((models) => {
       this.registerModels(models)
     })
+
+    // Listen to app download events
+    this.handleDesktopEvents()
   }
 
   /**
@@ -48,6 +57,17 @@ export default class JanModelExtension extends ModelExtension {
    * @returns A Promise that resolves when the model is downloaded.
    */
   async pullModel(model: string, id?: string): Promise<void> {
+    if (id) {
+      const model: Model = ModelManager.instance().get(id)
+      // Clip vision model - should not be handled by cortex.cpp
+      // TensorRT model - should not be handled by cortex.cpp
+      if (
+        model.engine === InferenceEngine.nitro_tensorrt_llm ||
+        model.settings.vision_model
+      ) {
+        return downloadModel(model)
+      }
+    }
     /**
      * Sending POST to /models/pull/{id} endpoint to pull the model
      */
@@ -61,10 +81,24 @@ export default class JanModelExtension extends ModelExtension {
    * @returns {Promise<void>} A promise that resolves when the download has been cancelled.
    */
   async cancelModelPull(model: string): Promise<void> {
+    if (model) {
+      const modelDto: Model = ModelManager.instance().get(model)
+      // Clip vision model - should not be handled by cortex.cpp
+      // TensorRT model - should not be handled by cortex.cpp
+      if (
+        modelDto.engine === InferenceEngine.nitro_tensorrt_llm ||
+        modelDto.settings.vision_model
+      ) {
+        for (const source of modelDto.sources) {
+          const path = await joinPath(['models', modelDto.id, source.filename])
+          return abortDownload(path)
+        }
+      }
+    }
     /**
      * Sending DELETE to /models/pull/{id} endpoint to cancel a model pull
      */
-    this.cortexAPI.cancelModelPull(model)
+    return this.cortexAPI.cancelModelPull(model)
   }
 
   /**
@@ -87,14 +121,18 @@ export default class JanModelExtension extends ModelExtension {
      * should compare and try import
      */
     let currentModels: Model[] = []
+
+    /**
+     * Legacy models should be supported
+     */
+    let legacyModels = await scanModelsFolder()
+
     try {
       if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
         // Updated from an older version than 0.5.5
         // Scan through the models folder and import them (Legacy flow)
         // Return models immediately
-        currentModels = await scanModelsFolder().then((models) => {
-          return models ?? []
-        })
+        currentModels = legacyModels
       } else {
         currentModels = JSON.parse(
           localStorage.getItem(ExtensionEnum.downloadedModels)
@@ -116,7 +154,7 @@ export default class JanModelExtension extends ModelExtension {
     await this.cortexAPI.getModels().then((models) => {
       const existingIds = models.map((e) => e.id)
       toImportModels = toImportModels.filter(
-        (e: Model) => !existingIds.includes(e.id)
+        (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
       )
     })
 
@@ -147,13 +185,15 @@ export default class JanModelExtension extends ModelExtension {
     }
 
     /**
-     * All models are imported successfully before
-     * just return models from cortex.cpp
+     * Models are imported successfully before
+     * Now return models from cortex.cpp and merge with legacy models which are not imported
      */
     return (
       this.cortexAPI.getModels().then((models) => {
-        return models
-      }) ?? Promise.resolve([])
+        return models.concat(
+          legacyModels.filter((e) => !models.some((x) => x.id === e.id))
+        )
+      }) ?? Promise.resolve(legacyModels)
     )
   }
 
@@ -175,4 +215,31 @@ export default class JanModelExtension extends ModelExtension {
   async importModel(model: string, modelPath: string): Promise<void> {
     return this.cortexAPI.importModel(model, modelPath)
   }
+
+  /**
+   * Handle download state from main app
+   */
+  handleDesktopEvents() {
+    if (window && window.electronAPI) {
+      window.electronAPI.onFileDownloadUpdate(
+        async (_event: string, state: DownloadState | undefined) => {
+          if (!state) return
+          state.downloadState = 'downloading'
+          events.emit(DownloadEvent.onFileDownloadUpdate, state)
+        }
+      )
+      window.electronAPI.onFileDownloadError(
+        async (_event: string, state: DownloadState) => {
+          state.downloadState = 'error'
+          events.emit(DownloadEvent.onFileDownloadError, state)
+        }
+      )
+      window.electronAPI.onFileDownloadSuccess(
+        async (_event: string, state: DownloadState) => {
+          state.downloadState = 'end'
+          events.emit(DownloadEvent.onFileDownloadSuccess, state)
+        }
+      )
+    }
+  }
 }
diff --git a/extensions/model-extension/src/legacy/download.ts b/extensions/model-extension/src/legacy/download.ts
new file mode 100644
index 000000000..a1a998daf
--- /dev/null
+++ b/extensions/model-extension/src/legacy/download.ts
@@ -0,0 +1,97 @@
+import {
+  downloadFile,
+  DownloadRequest,
+  fs,
+  GpuSetting,
+  InferenceEngine,
+  joinPath,
+  Model,
+} from '@janhq/core'
+
+export const downloadModel = async (
+  model: Model,
+  gpuSettings?: GpuSetting,
+  network?: { ignoreSSL?: boolean; proxy?: string }
+): Promise<void> => {
+  const homedir = 'file://models'
+  const supportedGpuArch = ['ampere', 'ada']
+  // Create corresponding directory
+  const modelDirPath = await joinPath([homedir, model.id])
+  if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath)
+
+  if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
+    if (!gpuSettings || gpuSettings.gpus.length === 0) {
+      console.error('No GPU found. Please check your GPU setting.')
+      return
+    }
+    const firstGpu = gpuSettings.gpus[0]
+    if (!firstGpu.name.toLowerCase().includes('nvidia')) {
+      console.error('No Nvidia GPU found. Please check your GPU setting.')
+      return
+    }
+    const gpuArch = firstGpu.arch
+    if (gpuArch === undefined) {
+      console.error('No GPU architecture found. Please check your GPU setting.')
+      return
+    }
+
+    if (!supportedGpuArch.includes(gpuArch)) {
+      console.debug(
+        `Your GPU: ${JSON.stringify(firstGpu)} is not supported. Only 30xx, 40xx series are supported.`
+      )
+      return
+    }
+
+    const os = 'windows' // TODO: remove this hard coded value
+
+    const newSources = model.sources.map((source) => {
+      const newSource = { ...source }
+      newSource.url = newSource.url
+        .replace(/<os>/g, os)
+        .replace(/<gpuarch>/g, gpuArch)
+      return newSource
+    })
+    model.sources = newSources
+  }
+
+  console.debug(`Download sources: ${JSON.stringify(model.sources)}`)
+
+  if (model.sources.length > 1) {
+    // path to model binaries
+    for (const source of model.sources) {
+      let path = extractFileName(source.url, '.gguf')
+      if (source.filename) {
+        path = await joinPath([modelDirPath, source.filename])
+      }
+
+      const downloadRequest: DownloadRequest = {
+        url: source.url,
+        localPath: path,
+        modelId: model.id,
+      }
+      downloadFile(downloadRequest, network)
+    }
+  } else {
+    const fileName = extractFileName(model.sources[0]?.url, '.gguf')
+    const path = await joinPath([modelDirPath, fileName])
+    const downloadRequest: DownloadRequest = {
+      url: model.sources[0]?.url,
+      localPath: path,
+      modelId: model.id,
+    }
+    downloadFile(downloadRequest, network)
+  }
+}
+
+/**
+ *  try to retrieve the download file name from the source url
+ */
+function extractFileName(url: string, fileExtension: string): string {
+  if (!url) return fileExtension
+
+  const extractedFileName = url.split('/').pop()
+  const fileName = extractedFileName.toLowerCase().endsWith(fileExtension)
+    ? extractedFileName
+    : extractedFileName + fileExtension
+  return fileName
+}
diff --git a/extensions/model-extension/src/model-json.test.ts b/extensions/model-extension/src/legacy/model-json.test.ts
similarity index 100%
rename from extensions/model-extension/src/model-json.test.ts
rename to extensions/model-extension/src/legacy/model-json.test.ts
diff --git a/extensions/model-extension/src/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
similarity index 97%
rename from extensions/model-extension/src/model-json.ts
rename to extensions/model-extension/src/legacy/model-json.ts
index 46eee3482..646ae85d7 100644
--- a/extensions/model-extension/src/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@@ -71,7 +71,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
                   file.toLowerCase().endsWith('.gguf') || // GGUF
                   file.toLowerCase().endsWith('.engine') // Tensort-LLM
                 )
-              })?.length > 0 // TODO: find better way (can use basename to check the file name with source url)
+              })?.length >= (model.sources?.length ?? 1)
             )
           })
 
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 5df59b0fd..37711ee0d 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -50,7 +50,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         setDownloadState(state)
       }
     },
-    [setDownloadState, setInstallingExtension]
+    [addDownloadingModel, setDownloadState, setInstallingExtension]
   )
 
   const onFileDownloadError = useCallback(
@@ -64,7 +64,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         removeDownloadingModel(state.modelId)
       }
     },
-    [setDownloadState, removeInstallingExtension]
+    [removeInstallingExtension, setDownloadState, removeDownloadingModel]
   )
 
   const onFileDownloadStopped = useCallback(
@@ -79,7 +79,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         removeDownloadingModel(state.modelId)
       }
     },
-    [setDownloadState, removeInstallingExtension]
+    [removeInstallingExtension, setDownloadState, removeDownloadingModel]
   )
 
   const onFileDownloadSuccess = useCallback(
@@ -92,7 +92,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
     },
-    [setDownloadState]
+    [removeDownloadingModel, setDownloadState]
   )
 
   const onFileUnzipSuccess = useCallback(
@@ -121,7 +121,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       events.off(DownloadEvent.onFileDownloadUpdate, onFileDownloadUpdate)
       events.off(DownloadEvent.onFileDownloadError, onFileDownloadError)
       events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
-      events.off(DownloadEvent.onFileDownloadSuccess, onFileDownloadSuccess)
+      events.off(DownloadEvent.onFileDownloadStopped, onFileDownloadStopped)
       events.off(DownloadEvent.onFileUnzipSuccess, onFileUnzipSuccess)
     }
   }, [

From 90c7420c3489c9f236703cca3d3deeb232efdec3 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 24 Oct 2024 15:41:10 +0700
Subject: [PATCH 31/71] chore: add comments and clean unused imports

---
 web/containers/ModelDropdown/index.tsx        |  1 -
 .../ChatBody/EmptyThread/index.tsx            |  1 -
 .../Thread/ThreadCenterPanel/index.tsx        |  2 +-
 web/utils/huggingface.ts                      | 30 +++++++++++++++++++
 web/utils/model.ts                            |  6 ++++
 5 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index a5874b3de..abd9af247 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -200,7 +200,6 @@ const ModelDropdown = ({
         if (model)
           updateModelParameter(activeThread, {
             params: modelParams,
-            // modelPath: model.file_path,
             modelId: model.id,
             engine: model.engine,
           })
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
index a99e6306f..403370ade 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/EmptyThread/index.tsx
@@ -1,6 +1,5 @@
 import { memo } from 'react'
 
-import { InferenceEngine } from '@janhq/core'
 import { Button } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
 
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index c83a38a1a..2440af2c5 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -1,6 +1,6 @@
 /* eslint-disable @typescript-eslint/naming-convention */
 
-import { Fragment, useEffect, useState } from 'react'
+import { useEffect, useState } from 'react'
 
 import { Accept, useDropzone } from 'react-dropzone'
 
diff --git a/web/utils/huggingface.ts b/web/utils/huggingface.ts
index ceddc6867..3e71f3a0f 100644
--- a/web/utils/huggingface.ts
+++ b/web/utils/huggingface.ts
@@ -1,5 +1,13 @@
 import { AllQuantizations, getFileSize, HuggingFaceRepoData } from '@janhq/core'
 
+/**
+ * Fetches data from a Hugging Face repository.
+ *
+ * @param repoId - The ID of the Hugging Face repository.
+ * @param huggingFaceAccessToken - Optional access token for Hugging Face API.
+ * @returns A promise that resolves to the HuggingFaceRepoData.
+ * @throws Will throw an error if the repository is not supported or if there is an error in the response.
+ */
 export const fetchHuggingFaceRepoData = async (
   repoId: string,
   huggingFaceAccessToken?: string
@@ -60,31 +68,53 @@ export const fetchHuggingFaceRepoData = async (
   return data
 }
 
+/**
+ * Converts a repository ID or URL to a valid Hugging Face API URL.
+ *
+ * @param repoId - The repository ID or URL to convert.
+ * @returns A string representing the Hugging Face API URL.
+ * @throws {InvalidHostError} If the URL is invalid or not from huggingface.co.
+ * @throws {Error} If the URL cannot be parsed.
+ */
 export function toHuggingFaceUrl(repoId: string): string {
   try {
+    // Attempt to create a URL object from the repoId
     const url = new URL(repoId)
+
+    // Check if the host is huggingface.co
     if (url.host !== 'huggingface.co') {
       throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
     }
 
+    // Split the pathname into parts and filter out empty strings
     const paths = url.pathname.split('/').filter((e) => e.trim().length > 0)
+
+    // Ensure there are at least two parts in the path (user/repo)
     if (paths.length < 2) {
       throw new InvalidHostError(`Invalid Hugging Face repo URL: ${repoId}`)
     }
 
+    // Construct and return the API URL
     return `${url.origin}/api/models/${paths[0]}/${paths[1]}`
   } catch (err) {
+    // Re-throw InvalidHostError if it was caught
     if (err instanceof InvalidHostError) {
       throw err
     }
 
+    // If repoId starts with 'https' but couldn't be parsed, throw an error
     if (repoId.startsWith('https')) {
       throw new Error(`Cannot parse url: ${repoId}`)
     }
 
+    // If repoId is not a URL, assume it's a valid repo ID and construct the API URL
     return `https://huggingface.co/api/models/${repoId}`
   }
 }
+
+/**
+ * Error thrown when the host of a URL is invalid or not from huggingface.co.
+ */
 export class InvalidHostError extends Error {
   constructor(message: string) {
     super(message)
diff --git a/web/utils/model.ts b/web/utils/model.ts
index 00efc1155..cb0f0ff31 100644
--- a/web/utils/model.ts
+++ b/web/utils/model.ts
@@ -1,3 +1,9 @@
+/**
+ * Extracts and normalizes the model ID from a given download URL.
+ *
+ * @param downloadUrl - The URL from which to extract the model ID.
+ * @returns The extracted model ID, or the original URL if extraction fails.
+ */
 export const normalizeModelId = (downloadUrl: string): string => {
   return downloadUrl.split('/').pop() ?? downloadUrl
 }

From 3643c8866e55a0757f773ce25d0e4d72818e5366 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 25 Oct 2024 12:33:43 +0700
Subject: [PATCH 32/71] fix: correct model settings on startup and strip down
 irrelevant model parameters

---
 core/src/browser/models/index.ts              |  5 ++
 .../src/browser/models/utils.test.ts          | 47 ++++++++---------
 .../src/browser/models/utils.ts               | 24 +++++----
 core/src/types/model/modelEntity.ts           | 15 +++++-
 .../inference-cortex-extension/src/index.ts   | 52 ++++++++++++++-----
 extensions/model-extension/src/cortex.ts      | 31 ++++-------
 web/containers/Providers/EventHandler.tsx     |  2 +-
 web/hooks/useSendChatMessage.ts               |  5 +-
 web/hooks/useUpdateModelParameters.ts         |  7 +--
 .../LocalServerRightPanel/index.tsx           |  6 +--
 web/screens/Thread/ThreadRightPanel/index.tsx |  6 +--
 11 files changed, 111 insertions(+), 89 deletions(-)
 rename web/utils/modelParam.test.ts => core/src/browser/models/utils.test.ts (87%)
 rename web/utils/modelParam.ts => core/src/browser/models/utils.ts (86%)

diff --git a/core/src/browser/models/index.ts b/core/src/browser/models/index.ts
index c16479b2b..81d37e501 100644
--- a/core/src/browser/models/index.ts
+++ b/core/src/browser/models/index.ts
@@ -3,3 +3,8 @@
  * @module
  */
 export { ModelManager } from './manager'
+
+/**
+ * Export all utils
+ */
+export * from './utils'
diff --git a/web/utils/modelParam.test.ts b/core/src/browser/models/utils.test.ts
similarity index 87%
rename from web/utils/modelParam.test.ts
rename to core/src/browser/models/utils.test.ts
index 97325d277..ac876c3dc 100644
--- a/web/utils/modelParam.test.ts
+++ b/core/src/browser/models/utils.test.ts
@@ -1,7 +1,10 @@
 // web/utils/modelParam.test.ts
-import { normalizeValue, validationRules } from './modelParam'
-import { extractModelLoadParams } from './modelParam';
-import { extractInferenceParams } from './modelParam';
+import {
+  normalizeValue,
+  validationRules,
+  extractModelLoadParams,
+  extractInferenceParams,
+} from './utils'
 
 describe('validationRules', () => {
   it('should validate temperature correctly', () => {
@@ -151,13 +154,12 @@ describe('validationRules', () => {
   })
 })
 
-
-  it('should normalize invalid values for keys not listed in validationRules', () => {
-    expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
-    expect(normalizeValue('invalid_key', 123)).toBe(123)
-    expect(normalizeValue('invalid_key', true)).toBe(true)
-    expect(normalizeValue('invalid_key', false)).toBe(false)
-  })
+it('should normalize invalid values for keys not listed in validationRules', () => {
+  expect(normalizeValue('invalid_key', 'invalid')).toBe('invalid')
+  expect(normalizeValue('invalid_key', 123)).toBe(123)
+  expect(normalizeValue('invalid_key', true)).toBe(true)
+  expect(normalizeValue('invalid_key', false)).toBe(false)
+})
 
 describe('normalizeValue', () => {
   it('should normalize ctx_len correctly', () => {
@@ -192,19 +194,16 @@ describe('normalizeValue', () => {
   })
 })
 
+it('should handle invalid values correctly by falling back to originParams', () => {
+  const modelParams = { temperature: 'invalid', token_limit: -1 }
+  const originParams = { temperature: 0.5, token_limit: 100 }
+  expect(extractInferenceParams(modelParams as any, originParams)).toEqual(originParams)
+})
 
-  it('should handle invalid values correctly by falling back to originParams', () => {
-    const modelParams = { temperature: 'invalid', token_limit: -1 };
-    const originParams = { temperature: 0.5, token_limit: 100 };
-    expect(extractInferenceParams(modelParams, originParams)).toEqual(originParams);
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractModelLoadParams()).toEqual({})
+})
 
-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractModelLoadParams()).toEqual({});
-  });
-
-
-  it('should return an empty object when no modelParams are provided', () => {
-    expect(extractInferenceParams()).toEqual({});
-  });
+it('should return an empty object when no modelParams are provided', () => {
+  expect(extractInferenceParams()).toEqual({})
+})
diff --git a/web/utils/modelParam.ts b/core/src/browser/models/utils.ts
similarity index 86%
rename from web/utils/modelParam.ts
rename to core/src/browser/models/utils.ts
index 315aeaeb3..0e52441b2 100644
--- a/web/utils/modelParam.ts
+++ b/core/src/browser/models/utils.ts
@@ -1,26 +1,20 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 /* eslint-disable @typescript-eslint/naming-convention */
-import { ModelRuntimeParams, ModelSettingParams } from '@janhq/core'
-
-import { ModelParams } from '@/types/model'
+import { ModelParams, ModelRuntimeParams, ModelSettingParams } from '../../types'
 
 /**
  * Validation rules for model parameters
  */
 export const validationRules: { [key: string]: (value: any) => boolean } = {
-  temperature: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 2,
+  temperature: (value: any) => typeof value === 'number' && value >= 0 && value <= 2,
   token_limit: (value: any) => Number.isInteger(value) && value >= 0,
   top_k: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
   top_p: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
   stream: (value: any) => typeof value === 'boolean',
   max_tokens: (value: any) => Number.isInteger(value) && value >= 0,
-  stop: (value: any) =>
-    Array.isArray(value) && value.every((v) => typeof v === 'string'),
-  frequency_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
-  presence_penalty: (value: any) =>
-    typeof value === 'number' && value >= 0 && value <= 1,
+  stop: (value: any) => Array.isArray(value) && value.every((v) => typeof v === 'string'),
+  frequency_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
+  presence_penalty: (value: any) => typeof value === 'number' && value >= 0 && value <= 1,
 
   ctx_len: (value: any) => Number.isInteger(value) && value >= 0,
   ngl: (value: any) => Number.isInteger(value) && value >= 0,
@@ -76,6 +70,7 @@ export const extractInferenceParams = (
     stop: undefined,
     frequency_penalty: undefined,
     presence_penalty: undefined,
+    engine: undefined,
   }
 
   const runtimeParams: ModelRuntimeParams = {}
@@ -119,11 +114,18 @@ export const extractModelLoadParams = (
     embedding: undefined,
     n_parallel: undefined,
     cpu_threads: undefined,
+    pre_prompt: undefined,
+    system_prompt: undefined,
+    ai_prompt: undefined,
+    user_prompt: undefined,
     prompt_template: undefined,
+    model_path: undefined,
     llama_model_path: undefined,
     mmproj: undefined,
+    cont_batching: undefined,
     vision_model: undefined,
     text_model: undefined,
+    engine: undefined,
   }
   const settingParams: ModelSettingParams = {}
 
diff --git a/core/src/types/model/modelEntity.ts b/core/src/types/model/modelEntity.ts
index 25ed95b8d..7b67a8e94 100644
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@@ -15,7 +15,6 @@ export type ModelInfo = {
  * Represents the inference engine.
  * @stored
  */
-
 export enum InferenceEngine {
   anthropic = 'anthropic',
   mistral = 'mistral',
@@ -34,6 +33,7 @@ export enum InferenceEngine {
   cortex_tensorrtllm = 'tensorrt-llm',
 }
 
+// Represents an artifact of a model, including its filename and URL
 export type ModelArtifact = {
   filename: string
   url: string
@@ -105,6 +105,7 @@ export type Model = {
   engine: InferenceEngine
 }
 
+// Represents metadata associated with a model
 export type ModelMetadata = {
   author: string
   tags: string[]
@@ -125,14 +126,20 @@ export type ModelSettingParams = {
   n_parallel?: number
   cpu_threads?: number
   prompt_template?: string
+  pre_prompt?: string
   system_prompt?: string
   ai_prompt?: string
   user_prompt?: string
+  // path param
+  model_path?: string
+  // legacy path param
   llama_model_path?: string
+  // clip model path
   mmproj?: string
   cont_batching?: boolean
   vision_model?: boolean
   text_model?: boolean
+  engine?: boolean
 }
 
 /**
@@ -151,6 +158,12 @@ export type ModelRuntimeParams = {
   engine?: string
 }
 
+// Represents a model that failed to initialize, including the error
 export type ModelInitFailed = Model & {
   error: Error
 }
+
+/**
+ * ModelParams types
+ */
+export type ModelParams = ModelRuntimeParams | ModelSettingParams
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 364bfe79c..8143a71cf 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -10,11 +10,12 @@ import {
   Model,
   executeOnMain,
   systemInformation,
-  log,
   joinPath,
   dirName,
   LocalOAIEngine,
   InferenceEngine,
+  getJanDataFolderPath,
+  extractModelLoadParams,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@@ -62,24 +63,38 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   override async loadModel(
     model: Model & { file_path?: string }
   ): Promise<void> {
-    // Legacy model cache - should import
-    if (model.engine === InferenceEngine.nitro && model.file_path) {
-      // Try importing the model
-      const modelPath = await this.modelPath(model)
-      await this.queue.add(() =>
-        ky
-          .post(`${CORTEX_API_URL}/v1/models/${model.id}`, {
-            json: { model: model.id, modelPath: modelPath },
-          })
-          .json()
-          .catch((e) => log(e.message ?? e ?? ''))
-      )
+    if (
+      model.engine === InferenceEngine.nitro &&
+      model.settings.llama_model_path
+    ) {
+      // Legacy chat model support
+      model.settings = {
+        ...model.settings,
+        llama_model_path: await getModelFilePath(
+          model.id,
+          model.settings.llama_model_path
+        ),
+      }
+    } else {
+      const { llama_model_path, ...settings } = model.settings
+      model.settings = settings
+    }
+
+    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+      // Legacy clip vision model support
+      model.settings = {
+        ...model.settings,
+        mmproj: await getModelFilePath(model.id, model.settings.mmproj),
+      }
+    } else {
+      const { mmproj, ...settings } = model.settings
+      model.settings = settings
     }
 
     return await ky
       .post(`${CORTEX_API_URL}/v1/models/start`, {
         json: {
-          ...model.settings,
+          ...extractModelLoadParams(model.settings),
           model: model.id,
           engine:
             model.engine === InferenceEngine.nitro // Legacy model cache
@@ -131,3 +146,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       .then(() => {})
   }
 }
+
+/// Legacy
+export const getModelFilePath = async (
+  id: string,
+  file: string
+): Promise<string> => {
+  return joinPath([await getJanDataFolderPath(), 'models', id, file])
+}
+///
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index c690f0c16..7f48f10ec 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -1,13 +1,7 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
-import {
-  DownloadEvent,
-  events,
-  Model,
-  ModelEvent,
-  ModelRuntimeParams,
-  ModelSettingParams,
-} from '@janhq/core'
+import { events, extractModelLoadParams, Model, ModelEvent } from '@janhq/core'
+import { extractInferenceParams } from '@janhq/core'
 /**
  * cortex.cpp Model APIs interface
  */
@@ -204,20 +198,17 @@ export class CortexAPI implements ICortexAPI {
    * @returns
    */
   private transformModel(model: any) {
-    model.parameters = setParameters<ModelRuntimeParams>(model)
-    model.settings = setParameters<ModelSettingParams>(model)
-    model.metadata = {
+    model.parameters = {
+      ...extractInferenceParams(model),
+      ...model.parameters,
+    }
+    model.settings = {
+      ...extractModelLoadParams(model),
+      ...model.settings,
+    }
+    model.metadata = model.metadata ?? {
       tags: [],
     }
     return model as Model
   }
 }
-
-type FilteredParams<T> = {
-  [K in keyof T]: T[K]
-}
-
-function setParameters<T>(params: T): T {
-  const filteredParams: FilteredParams<T> = { ...params }
-  return filteredParams
-}
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index 72d35aad3..0f5cf389d 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -15,6 +15,7 @@ import {
   Thread,
   EngineManager,
   InferenceEngine,
+  extractInferenceParams,
 } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
 import { ulid } from 'ulidx'
@@ -22,7 +23,6 @@ import { ulid } from 'ulidx'
 import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
 
 import { isLocalEngine } from '@/utils/modelEngine'
-import { extractInferenceParams } from '@/utils/modelParam'
 
 import { extensionManager } from '@/extension'
 import {
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 4bc91cad2..cda53b24a 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -12,6 +12,7 @@ import {
   ToolManager,
   ChatCompletionMessage,
 } from '@janhq/core'
+import { extractInferenceParams, extractModelLoadParams } from '@janhq/core'
 import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import {
@@ -23,10 +24,6 @@ import {
 import { Stack } from '@/utils/Stack'
 import { compressImage, getBase64 } from '@/utils/base64'
 import { MessageRequestBuilder } from '@/utils/messageRequestBuilder'
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
 
 import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'
 
diff --git a/web/hooks/useUpdateModelParameters.ts b/web/hooks/useUpdateModelParameters.ts
index 2af6e3323..6eb7c3c5a 100644
--- a/web/hooks/useUpdateModelParameters.ts
+++ b/web/hooks/useUpdateModelParameters.ts
@@ -6,15 +6,12 @@ import {
   InferenceEngine,
   Thread,
   ThreadAssistantInfo,
+  extractInferenceParams,
+  extractModelLoadParams,
 } from '@janhq/core'
 
 import { useAtom, useAtomValue, useSetAtom } from 'jotai'
 
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
-
 import { extensionManager } from '@/extension'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 import {
diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index 628a61512..0d2fe0f7c 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -1,5 +1,6 @@
 import { useCallback, useEffect, useMemo, useState } from 'react'
 
+import { extractInferenceParams, extractModelLoadParams } from '@janhq/core'
 import { Accordion, AccordionItem, Input, Tooltip } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
 import { AlertTriangleIcon, CheckIcon, CopyIcon, InfoIcon } from 'lucide-react'
@@ -16,11 +17,6 @@ import { useClipboard } from '@/hooks/useClipboard'
 
 import { getConfigurationsData } from '@/utils/componentSettings'
 
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
-
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
 
diff --git a/web/screens/Thread/ThreadRightPanel/index.tsx b/web/screens/Thread/ThreadRightPanel/index.tsx
index 5a8fd3ebb..674c97766 100644
--- a/web/screens/Thread/ThreadRightPanel/index.tsx
+++ b/web/screens/Thread/ThreadRightPanel/index.tsx
@@ -4,6 +4,8 @@ import {
   InferenceEngine,
   SettingComponentProps,
   SliderComponentProps,
+  extractInferenceParams,
+  extractModelLoadParams,
 } from '@janhq/core'
 import {
   Tabs,
@@ -31,10 +33,6 @@ import useUpdateModelParameters from '@/hooks/useUpdateModelParameters'
 
 import { getConfigurationsData } from '@/utils/componentSettings'
 import { isLocalEngine } from '@/utils/modelEngine'
-import {
-  extractInferenceParams,
-  extractModelLoadParams,
-} from '@/utils/modelParam'
 
 import PromptTemplateSetting from './PromptTemplateSetting'
 import Tools from './Tools'

From 2c11caf87e7f34e18b8e939e3bb23f51f7bc937b Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 28 Oct 2024 16:36:58 +0700
Subject: [PATCH 33/71] chore: shared cuda dependencies

---
 .husky/pre-commit                             |  2 +-
 electron/package.json                         |  6 ++-
 electron/shared/.gitkeep                      |  0
 .../inference-cortex-extension/download.bat   |  5 ++-
 .../inference-cortex-extension/download.sh    |  4 +-
 .../src/node/index.ts                         | 41 +++++++++++--------
 extensions/model-extension/src/index.ts       |  3 +-
 7 files changed, 36 insertions(+), 25 deletions(-)
 create mode 100644 electron/shared/.gitkeep

diff --git a/.husky/pre-commit b/.husky/pre-commit
index 177cd4216..53c4e577e 100644
--- a/.husky/pre-commit
+++ b/.husky/pre-commit
@@ -1 +1 @@
-oxlint --fix || npm run lint --fix
\ No newline at end of file
+npx oxlint@latest --fix
\ No newline at end of file
diff --git a/electron/package.json b/electron/package.json
index feaee5e16..273062139 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -18,7 +18,8 @@
       "docs/**/*",
       "scripts/**/*",
       "icons/**/*",
-      "themes"
+      "themes",
+      "shared"
     ],
     "asarUnpack": [
       "pre-install",
@@ -26,7 +27,8 @@
       "docs",
       "scripts",
       "icons",
-      "themes"
+      "themes",
+      "shared"
     ],
     "publish": [
       {
diff --git a/electron/shared/.gitkeep b/electron/shared/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 6d917cf79..9b43459ce 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -1,5 +1,6 @@
 @echo off
 set BIN_PATH=./bin
+set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
 
 @REM Download cortex.llamacpp binaries
@@ -18,8 +19,8 @@ call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
-call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
-call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
+call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 3e809765c..be1f67f2d 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -23,8 +23,8 @@ if [ "$OS_TYPE" == "Linux" ]; then
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index c54dae4c0..a52de20bb 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -2,6 +2,7 @@ import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
 import { executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
+import { appResourcePath } from '@janhq/core/node'
 
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@@ -19,9 +20,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
       // If ngl is not set or equal to 0, run on CPU with correct instructions
       systemInfo?.gpuSetting
         ? {
-            ...systemInfo.gpuSetting,
-            run_mode: systemInfo.gpuSetting.run_mode,
-          }
+          ...systemInfo.gpuSetting,
+          run_mode: systemInfo.gpuSetting.run_mode,
+        }
         : undefined
     )
 
@@ -29,20 +30,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
     log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
-    // Add engine path to the PATH and LD_LIBRARY_PATH
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath,
-      path.delimiter,
-      executableOptions.binPath
-    )
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
-    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
-      path.delimiter,
-      executableOptions.enginePath,
-      path.delimiter,
-      executableOptions.binPath
-    )
+    addEnvPaths(path.join(appResourcePath(), 'shared'))
+    addEnvPaths(executableOptions.binPath)
+    addEnvPaths(executableOptions.enginePath)
 
     const dataFolderPath = getJanDataFolderPath()
     watchdog = new ProcessWatchdog(
@@ -83,6 +73,23 @@ function dispose() {
   watchdog?.terminate()
 }
 
+function addEnvPaths(dest: string) {
+  // Add engine path to the PATH and LD_LIBRARY_PATH
+  if (process.platform === 'win32') {
+    process.env.PATH = (process.env.PATH || '').concat(
+      path.delimiter,
+      dest,
+    )
+    log(`[CORTEX] PATH: ${process.env.PATH}`)
+  } else {
+    process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
+      path.delimiter,
+      dest,
+    )
+    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
+  }
+}
+
 /**
  * Cortex process info
  */
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 3696acd79..3e0af0172 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -13,6 +13,7 @@ import {
 import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
 import { downloadModel } from './legacy/download'
+import { systemInformation } from '@janhq/core'
 
 declare const SETTINGS: Array<any>
 
@@ -65,7 +66,7 @@ export default class JanModelExtension extends ModelExtension {
         model.engine === InferenceEngine.nitro_tensorrt_llm ||
         model.settings.vision_model
       ) {
-        return downloadModel(model)
+        return downloadModel(model, (await systemInformation()).gpuSetting)
       }
     }
     /**

From a466bbca38d04cc4746cd61a80d64338c8ac36c3 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 28 Oct 2024 19:08:32 +0700
Subject: [PATCH 34/71] chore: update legacy tensorrt-llm download and run

---
 .../extensions/engines/LocalOAIEngine.ts      | 41 +++++++++++++++++--
 .../inference-cortex-extension/src/index.ts   | 13 ------
 extensions/model-extension/src/index.ts       | 18 +++++---
 .../model-extension/src/legacy/delete.ts      | 18 ++++++++
 .../model-extension/src/legacy/model-json.ts  |  4 +-
 web/containers/Providers/EventListener.tsx    |  3 +-
 web/hooks/useDownloadState.ts                 | 17 +++-----
 7 files changed, 78 insertions(+), 36 deletions(-)
 create mode 100644 extensions/model-extension/src/legacy/delete.ts

diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.ts b/core/src/browser/extensions/engines/LocalOAIEngine.ts
index e8bd8cdf2..b54f8fbde 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.ts
@@ -1,4 +1,4 @@
-import { executeOnMain, systemInformation, dirName } from '../../core'
+import { executeOnMain, systemInformation, dirName, joinPath, getJanDataFolderPath } from '../../core'
 import { events } from '../../events'
 import { Model, ModelEvent } from '../../../types'
 import { OAIEngine } from './OAIEngine'
@@ -29,13 +29,46 @@ export abstract class LocalOAIEngine extends OAIEngine {
   /**
    * Load the model.
    */
-  override async loadModel(model: Model): Promise<void> {
-    return Promise.resolve()
+  override async loadModel(model: Model & { file_path?: string }): Promise<void> {
+    if (model.engine.toString() !== this.provider) return
+    const modelFolder = 'file_path' in model && model.file_path ? await dirName(model.file_path) : await this.getModelFilePath(model.id)
+    const systemInfo = await systemInformation()
+    const res = await executeOnMain(
+      this.nodeModule,
+      this.loadModelFunctionName,
+      {
+        modelFolder,
+        model,
+      },
+      systemInfo
+    )
+
+    if (res?.error) {
+      events.emit(ModelEvent.OnModelFail, { error: res.error })
+      return Promise.reject(res.error)
+    } else {
+      this.loadedModel = model
+      events.emit(ModelEvent.OnModelReady, model)
+      return Promise.resolve()
+    }
   }
   /**
    * Stops the model.
    */
   override async unloadModel(model?: Model) {
-    return Promise.resolve()
+    if (model?.engine && model.engine?.toString() !== this.provider) return Promise.resolve()
+
+    this.loadedModel = undefined
+    await executeOnMain(this.nodeModule, this.unloadModelFunctionName).then(() => {
+      events.emit(ModelEvent.OnModelStopped, {})
+    })
   }
+
+  /// Legacy
+  private getModelFilePath = async (
+    id: string,
+  ): Promise<string> => {
+    return joinPath([await getJanDataFolderPath(), 'models', id])
+  }
+  ///
 }
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 8143a71cf..45f0e5fe0 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -118,19 +118,6 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       .then()
   }
 
-  private async modelPath(
-    model: Model & { file_path?: string }
-  ): Promise<string> {
-    if (!model.file_path) return model.id
-    return await joinPath([
-      await dirName(model.file_path),
-      model.sources[0]?.filename ??
-        model.settings?.llama_model_path ??
-        model.sources[0]?.url.split('/').pop() ??
-        model.id,
-    ])
-  }
-
   /**
    * Do health check on cortex.cpp
    * @returns
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 3e0af0172..a42fc2a52 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -14,6 +14,7 @@ import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
 import { downloadModel } from './legacy/download'
 import { systemInformation } from '@janhq/core'
+import { deleteModelFiles } from './legacy/delete'
 
 declare const SETTINGS: Array<any>
 
@@ -50,7 +51,7 @@ export default class JanModelExtension extends ModelExtension {
    * Called when the extension is unloaded.
    * @override
    */
-  async onUnload() {}
+  async onUnload() { }
 
   /**
    * Downloads a machine learning model.
@@ -92,7 +93,7 @@ export default class JanModelExtension extends ModelExtension {
       ) {
         for (const source of modelDto.sources) {
           const path = await joinPath(['models', modelDto.id, source.filename])
-          return abortDownload(path)
+          await abortDownload(path)
         }
       }
     }
@@ -108,7 +109,14 @@ export default class JanModelExtension extends ModelExtension {
    * @returns A Promise that resolves when the model is deleted.
    */
   async deleteModel(model: string): Promise<void> {
+    const modelDto: Model = ModelManager.instance().get(model)
     return this.cortexAPI.deleteModel(model)
+      .catch(e => console.debug(e))
+      .finally(async () => {
+        // Delete legacy model files
+        await deleteModelFiles(modelDto)
+          .catch(e => console.debug(e))
+      })
   }
 
   /**
@@ -174,9 +182,9 @@ export default class JanModelExtension extends ModelExtension {
             await joinPath([
               await dirName(model.file_path),
               model.sources[0]?.filename ??
-                model.settings?.llama_model_path ??
-                model.sources[0]?.url.split('/').pop() ??
-                model.id,
+              model.settings?.llama_model_path ??
+              model.sources[0]?.url.split('/').pop() ??
+              model.id,
             ])
           )
         )
diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
new file mode 100644
index 000000000..a46d90ea5
--- /dev/null
+++ b/extensions/model-extension/src/legacy/delete.ts
@@ -0,0 +1,18 @@
+import { fs, joinPath, Model } from "@janhq/core"
+
+export const deleteModelFiles = async (model: Model) => {
+    try {
+        const dirPath = await joinPath(['file://models', model.id])
+
+        // remove all files under dirPath except model.json
+        const files = await fs.readdirSync(dirPath)
+        const deletePromises = files.map(async (fileName: string) => {
+            if (fileName !== 'model.json') {
+                return fs.unlinkSync(await joinPath([dirPath, fileName]))
+            }
+        })
+        await Promise.allSettled(deletePromises)
+    } catch (err) {
+        console.error(err)
+    }
+}
\ No newline at end of file
diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
index 646ae85d7..c47b7c661 100644
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@@ -1,4 +1,4 @@
-import { Model, fs, joinPath } from '@janhq/core'
+import { InferenceEngine, Model, fs, joinPath } from '@janhq/core'
 //// LEGACY MODEL FOLDER ////
 /**
  * Scan through models folder and return downloaded models
@@ -71,7 +71,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
                   file.toLowerCase().endsWith('.gguf') || // GGUF
                   file.toLowerCase().endsWith('.engine') // Tensort-LLM
                 )
-              })?.length >= (model.sources?.length ?? 1)
+              })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1))
             )
           })
 
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 37711ee0d..af91b6027 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -88,7 +88,8 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       if (state.downloadType !== 'extension') {
         state.downloadState = 'end'
         setDownloadState(state)
-        removeDownloadingModel(state.modelId)
+        if (state.percent !== 0)
+          removeDownloadingModel(state.modelId)
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
     },
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 59267749e..9aaa00bc4 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -108,6 +108,7 @@ export const setDownloadStateAtom = atom(
         )
 
         modelDownloadState.children = updatedChildren
+
         if (isAnyChildDownloadNotReady) {
           // just update the children
           currentState[state.modelId] = modelDownloadState
@@ -115,23 +116,17 @@ export const setDownloadStateAtom = atom(
           return
         }
 
-        const parentTotalSize = modelDownloadState.size.total
-        if (parentTotalSize === 0) {
-          // calculate the total size of the parent by sum all children total size
-          const totalSize = updatedChildren.reduce(
-            (acc, m) => acc + m.size.total,
-            0
-          )
-
-          modelDownloadState.size.total = totalSize
-        }
-
+        const parentTotalSize = updatedChildren.reduce(
+          (acc, m) => acc + m.size.total,
+          0
+        )
         // calculate the total transferred size by sum all children transferred size
         const transferredSize = updatedChildren.reduce(
           (acc, m) => acc + m.size.transferred,
           0
         )
         modelDownloadState.size.transferred = transferredSize
+
         modelDownloadState.percent =
           parentTotalSize === 0 ? 0 : transferredSize / parentTotalSize
         currentState[state.modelId] = modelDownloadState

From e5f5d887e3cbc33ae9b7dc222767ff3dc3e371c0 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 28 Oct 2024 19:25:45 +0700
Subject: [PATCH 35/71] fix: persists model.json on download (legacy models)

---
 extensions/model-extension/src/legacy/download.ts | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/extensions/model-extension/src/legacy/download.ts b/extensions/model-extension/src/legacy/download.ts
index a1a998daf..d4d6c62d9 100644
--- a/extensions/model-extension/src/legacy/download.ts
+++ b/extensions/model-extension/src/legacy/download.ts
@@ -19,6 +19,14 @@ export const downloadModel = async (
   const modelDirPath = await joinPath([homedir, model.id])
   if (!(await fs.existsSync(modelDirPath))) await fs.mkdir(modelDirPath)
 
+  const jsonFilePath = await joinPath([modelDirPath, 'model.json'])
+  // Write model.json on download
+  if (!(await fs.existsSync(jsonFilePath)))
+    await fs.writeFileSync(
+      jsonFilePath,
+      JSON.stringify(model, null, 2)
+    )
+
   if (model.engine === InferenceEngine.nitro_tensorrt_llm) {
     if (!gpuSettings || gpuSettings.gpus.length === 0) {
       console.error('No GPU found. Please check your GPU setting.')

From 83edc1fbc729aa1ec4e5dd24a5385d8b49e3aa7c Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 29 Oct 2024 14:30:21 +0700
Subject: [PATCH 36/71] chore: linter and test

---
 .../extensions/engines/LocalOAIEngine.test.ts | 43 ++++++++++++++++---
 web/containers/Providers/EventListener.tsx    |  3 +-
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
index 8a7722f3a..e4296468f 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@@ -43,22 +43,55 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should load model correctly', async () => {
-    const model: Model = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const modelFolder = 'path/to'
+    const systemInfo = { os: 'testOS' }
+    const res = { error: null }
 
-    expect(engine.loadModel(model)).toBeTruthy()
+    ;(dirName as jest.Mock).mockResolvedValue(modelFolder)
+    ;(systemInformation as jest.Mock).mockResolvedValue(systemInfo)
+    ;(executeOnMain as jest.Mock).mockResolvedValue(res)
+
+    await engine.loadModel(model)
+
+    expect(dirName).toHaveBeenCalledWith(model.file_path)
+    expect(systemInformation).toHaveBeenCalled()
+    expect(executeOnMain).toHaveBeenCalledWith(
+      engine.nodeModule,
+      engine.loadModelFunctionName,
+      { modelFolder, model },
+      systemInfo
+    )
+    expect(events.emit).toHaveBeenCalledWith(ModelEvent.OnModelReady, model)
+  })
+
+  it('should handle load model error', async () => {
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const modelFolder = 'path/to'
+    const systemInfo = { os: 'testOS' }
+    const res = { error: 'load error' }
+
+    ;(dirName as jest.Mock).mockResolvedValue(modelFolder)
+    ;(systemInformation as jest.Mock).mockResolvedValue(systemInfo)
+    ;(executeOnMain as jest.Mock).mockResolvedValue(res)
+
+    await expect(engine.loadModel(model)).rejects.toEqual('load error')
+
+    expect(events.emit).toHaveBeenCalledWith(ModelEvent.OnModelFail, { error: res.error })
   })
 
   it('should unload model correctly', async () => {
     const model: Model = { engine: 'testProvider' } as any
 
-    expect(engine.unloadModel(model)).toBeTruthy()
+    await engine.unloadModel(model)
+
+    expect(executeOnMain).toHaveBeenCalledWith(engine.nodeModule, engine.unloadModelFunctionName)
+    expect(events.emit).toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
   })
 
   it('should not unload model if engine does not match', async () => {
     const model: Model = { engine: 'otherProvider' } as any
-
     await engine.unloadModel(model)
-
     expect(executeOnMain).not.toHaveBeenCalled()
     expect(events.emit).not.toHaveBeenCalledWith(ModelEvent.OnModelStopped, {})
   })
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index af91b6027..9535bbfa6 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -88,8 +88,7 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
       if (state.downloadType !== 'extension') {
         state.downloadState = 'end'
         setDownloadState(state)
-        if (state.percent !== 0)
-          removeDownloadingModel(state.modelId)
+        if (state.percent !== 0) removeDownloadingModel(state.modelId)
       }
       events.emit(ModelEvent.OnModelsUpdate, {})
     },

From 61f72e677517870732a71cc62b8143988be1107b Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 29 Oct 2024 14:32:13 +0700
Subject: [PATCH 37/71] chore: bump cortex-cpp v1.0.2-rc1

---
 .../browser/extensions/engines/LocalOAIEngine.test.ts  |  2 +-
 extensions/inference-cortex-extension/bin/version.txt  |  2 +-
 extensions/inference-cortex-extension/download.bat     |  3 +++
 extensions/inference-cortex-extension/download.sh      | 10 ++++++++--
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
index e4296468f..08fd947da 100644
--- a/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
+++ b/core/src/browser/extensions/engines/LocalOAIEngine.test.ts
@@ -43,7 +43,7 @@ describe('LocalOAIEngine', () => {
   })
 
   it('should load model correctly', async () => {
-    const model: ModelFile = { engine: 'testProvider', file_path: 'path/to/model' } as any
+    const model: any = { engine: 'testProvider', file_path: 'path/to/model' } as any
     const modelFolder = 'path/to'
     const systemInfo = { os: 'testOS' }
     const res = { error: null }
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index 7f207341d..a9d40871b 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.1
\ No newline at end of file
+1.0.2-rc1
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 9b43459ce..e4d777ea2 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -22,6 +22,9 @@ call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 
+move %BIN_PATH%\cortex-server-beta.exe %BIN_PATH%\cortex-server.exe
+del %BIN_PATH%\cortex-beta.exe
+del %BIN_PATH%\cortex.exe
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index be1f67f2d..902a31e51 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -11,7 +11,10 @@ OS_TYPE=$(uname)
 if [ "$OS_TYPE" == "Linux" ]; then
     # Linux downloads
     download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin"
-    chmod +x "./bin/cortex"
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
 
     # Download engines for Linux
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
@@ -29,7 +32,10 @@ if [ "$OS_TYPE" == "Linux" ]; then
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
     download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1
-    chmod +x "./bin/cortex"
+    mv ./bin/cortex-server-beta ./bin/cortex-server
+    rm -rf ./bin/cortex
+    rm -rf ./bin/cortex-beta
+    chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
     download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp

From 1ab02b706f313b6c275c85c1e0bb54d354558e69 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 29 Oct 2024 21:28:25 +0700
Subject: [PATCH 38/71] fix: model import symlink

---
 .../inference-cortex-extension/src/index.ts   | 12 +++--
 extensions/model-extension/src/cortex.ts      |  2 +-
 extensions/model-extension/src/index.ts       | 46 +++++++++++--------
 web/hooks/useDownloadState.ts                 |  1 +
 web/hooks/useModels.ts                        |  2 +-
 web/utils/converter.ts                        |  4 +-
 web/utils/modelEngine.ts                      | 14 +++---
 7 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 45f0e5fe0..fb2ee9a46 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -71,7 +71,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       model.settings = {
         ...model.settings,
         llama_model_path: await getModelFilePath(
-          model.id,
+          model,
           model.settings.llama_model_path
         ),
       }
@@ -84,7 +84,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       // Legacy clip vision model support
       model.settings = {
         ...model.settings,
-        mmproj: await getModelFilePath(model.id, model.settings.mmproj),
+        mmproj: await getModelFilePath(model, model.settings.mmproj),
       }
     } else {
       const { mmproj, ...settings } = model.settings
@@ -136,9 +136,13 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
 /// Legacy
 export const getModelFilePath = async (
-  id: string,
+  model: Model,
   file: string
 ): Promise<string> => {
-  return joinPath([await getJanDataFolderPath(), 'models', id, file])
+  // Symlink to the model file
+  if (!model.sources[0]?.url.startsWith('http')) {
+    return model.sources[0]?.url
+  }
+  return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
 }
 ///
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index 7f48f10ec..ca9c2b921 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -168,7 +168,7 @@ export class CortexAPI implements ICortexAPI {
               (acc, cur) => acc + cur.bytes,
               0
             )
-            const percent = (transferred / total || 0) * 100
+            const percent = total > 0 ? transferred / total : 0
 
             events.emit(DownloadTypes[data.type], {
               modelId: data.task.id,
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index a42fc2a52..439481bc4 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -51,7 +51,7 @@ export default class JanModelExtension extends ModelExtension {
    * Called when the extension is unloaded.
    * @override
    */
-  async onUnload() { }
+  async onUnload() {}
 
   /**
    * Downloads a machine learning model.
@@ -64,8 +64,9 @@ export default class JanModelExtension extends ModelExtension {
       // Clip vision model - should not be handled by cortex.cpp
       // TensorRT model - should not be handled by cortex.cpp
       if (
-        model.engine === InferenceEngine.nitro_tensorrt_llm ||
-        model.settings.vision_model
+        model &&
+        (model.engine === InferenceEngine.nitro_tensorrt_llm ||
+          model.settings.vision_model)
       ) {
         return downloadModel(model, (await systemInformation()).gpuSetting)
       }
@@ -88,8 +89,9 @@ export default class JanModelExtension extends ModelExtension {
       // Clip vision model - should not be handled by cortex.cpp
       // TensorRT model - should not be handled by cortex.cpp
       if (
-        modelDto.engine === InferenceEngine.nitro_tensorrt_llm ||
-        modelDto.settings.vision_model
+        modelDto &&
+        (modelDto.engine === InferenceEngine.nitro_tensorrt_llm ||
+          modelDto.settings.vision_model)
       ) {
         for (const source of modelDto.sources) {
           const path = await joinPath(['models', modelDto.id, source.filename])
@@ -110,12 +112,13 @@ export default class JanModelExtension extends ModelExtension {
    */
   async deleteModel(model: string): Promise<void> {
     const modelDto: Model = ModelManager.instance().get(model)
-    return this.cortexAPI.deleteModel(model)
-      .catch(e => console.debug(e))
+    return this.cortexAPI
+      .deleteModel(model)
+      .catch((e) => console.debug(e))
       .finally(async () => {
         // Delete legacy model files
-        await deleteModelFiles(modelDto)
-          .catch(e => console.debug(e))
+        if (modelDto)
+          await deleteModelFiles(modelDto).catch((e) => console.debug(e))
       })
   }
 
@@ -179,13 +182,15 @@ export default class JanModelExtension extends ModelExtension {
         toImportModels.map(async (model: Model & { file_path: string }) =>
           this.importModel(
             model.id,
-            await joinPath([
-              await dirName(model.file_path),
-              model.sources[0]?.filename ??
-              model.settings?.llama_model_path ??
-              model.sources[0]?.url.split('/').pop() ??
-              model.id,
-            ])
+            model.sources[0].url.startsWith('http')
+              ? await joinPath([
+                  await dirName(model.file_path),
+                  model.sources[0]?.filename ??
+                    model.settings?.llama_model_path ??
+                    model.sources[0]?.url.split('/').pop() ??
+                    model.id,
+                ]) // Copied models
+              : model.sources[0].url // Symlink models
           )
         )
       )
@@ -197,13 +202,14 @@ export default class JanModelExtension extends ModelExtension {
      * Models are imported successfully before
      * Now return models from cortex.cpp and merge with legacy models which are not imported
      */
-    return (
-      this.cortexAPI.getModels().then((models) => {
+    return await this.cortexAPI
+      .getModels()
+      .then((models) => {
         return models.concat(
           legacyModels.filter((e) => !models.some((x) => x.id === e.id))
         )
-      }) ?? Promise.resolve(legacyModels)
-    )
+      })
+      .catch(() => Promise.resolve(legacyModels))
   }
 
   /**
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index 9aaa00bc4..b6d9ec49a 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -125,6 +125,7 @@ export const setDownloadStateAtom = atom(
           (acc, m) => acc + m.size.transferred,
           0
         )
+        modelDownloadState.size.total = parentTotalSize
         modelDownloadState.size.transferred = transferredSize
 
         modelDownloadState.percent =
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 8bdbd6a90..0c898119c 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -34,7 +34,7 @@ const useModels = () => {
     const getDownloadedModels = async () => {
       const localModels = (await getModels()).map((e) => ({
         ...e,
-        name: ModelManager.instance().models.get(e.id)?.name ?? e.name,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
         metadata:
           ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
       }))
diff --git a/web/utils/converter.ts b/web/utils/converter.ts
index a0b05c9dd..9991fb5d7 100644
--- a/web/utils/converter.ts
+++ b/web/utils/converter.ts
@@ -18,8 +18,8 @@ export const formatDownloadPercentage = (
   input: number,
   options?: { hidePercentage?: boolean }
 ) => {
-  if (options?.hidePercentage) return input * 100
-  return (input * 100).toFixed(2) + '%'
+  if (options?.hidePercentage) return input <= 1 ? input * 100 : input
+  return (input <= 1 ? input * 100 : input).toFixed(2) + '%'
 }
 
 export const formatDownloadSpeed = (input: number | undefined) => {
diff --git a/web/utils/modelEngine.ts b/web/utils/modelEngine.ts
index 33b3ec3e1..2ac4a1acd 100644
--- a/web/utils/modelEngine.ts
+++ b/web/utils/modelEngine.ts
@@ -2,15 +2,16 @@ import { EngineManager, InferenceEngine, LocalOAIEngine } from '@janhq/core'
 
 export const getLogoEngine = (engine: InferenceEngine) => {
   switch (engine) {
-    case InferenceEngine.anthropic:
-      return 'images/ModelProvider/anthropic.svg'
-    case InferenceEngine.nitro_tensorrt_llm:
     case InferenceEngine.nitro:
-      return 'images/ModelProvider/nitro.svg'
     case InferenceEngine.cortex_llamacpp:
     case InferenceEngine.cortex_onnx:
     case InferenceEngine.cortex_tensorrtllm:
       return 'images/ModelProvider/cortex.svg'
+    case InferenceEngine.anthropic:
+      return 'images/ModelProvider/anthropic.svg'
+    case InferenceEngine.nitro_tensorrt_llm:
+      return 'images/ModelProvider/nitro.svg'
+
     case InferenceEngine.mistral:
       return 'images/ModelProvider/mistral.svg'
     case InferenceEngine.martian:
@@ -49,11 +50,10 @@ export const isLocalEngine = (engine: string) => {
 export const getTitleByEngine = (engine: InferenceEngine) => {
   switch (engine) {
     case InferenceEngine.nitro:
-      return 'Llama.cpp (Nitro)'
-    case InferenceEngine.nitro_tensorrt_llm:
-      return 'TensorRT-LLM (Nitro)'
     case InferenceEngine.cortex_llamacpp:
       return 'Llama.cpp (Cortex)'
+    case InferenceEngine.nitro_tensorrt_llm:
+      return 'TensorRT-LLM (Nitro)'
     case InferenceEngine.cortex_onnx:
       return 'Onnx (Cortex)'
     case InferenceEngine.cortex_tensorrtllm:

From 8837b872af90fa440ff75413219095367078c112 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 29 Oct 2024 22:17:01 +0700
Subject: [PATCH 39/71] test: fix chore

---
 web/utils/modelEngine.test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/utils/modelEngine.test.ts b/web/utils/modelEngine.test.ts
index 738e04c2a..04001f726 100644
--- a/web/utils/modelEngine.test.ts
+++ b/web/utils/modelEngine.test.ts
@@ -71,7 +71,7 @@ describe('isLocalEngine', () => {
   describe('getTitleByEngine', () => {
     it('should return correct title for InferenceEngine.nitro', () => {
       const result = getTitleByEngine(InferenceEngine.nitro)
-      expect(result).toBe('Llama.cpp (Nitro)')
+      expect(result).toBe('Llama.cpp (Cortex)')
     })
 
     it('should return correct title for InferenceEngine.nitro_tensorrt_llm', () => {

From 8c759676d9ceebb339e80a47d5818dff7fa7da87 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 30 Oct 2024 10:34:42 +0700
Subject: [PATCH 40/71] chore: fix responsible issue when importing models
 (cortex.cpp does not support download in parallel yet)

---
 web/containers/ModelDropdown/index.tsx        |  9 +++-
 web/hooks/useImportModel.ts                   | 41 ++++++++++---------
 .../ModelDownloadRow/index.tsx                | 11 +++--
 web/screens/Settings/MyModels/index.tsx       |  7 +++-
 web/utils/converter.ts                        |  2 +-
 5 files changed, 44 insertions(+), 26 deletions(-)

diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index abd9af247..4ff8edac5 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -261,8 +261,13 @@ const ModelDropdown = ({
   }, [])
 
   const findByEngine = filteredDownloadedModels
-    .filter((x) => !inActiveEngineProvider.includes(x.engine))
-    .map((x) => x.engine)
+    .map((x) => {
+      // Legacy engine support - they will be grouped under Cortex LlamaCPP
+      if (x.engine === InferenceEngine.nitro)
+        return InferenceEngine.cortex_llamacpp
+      return x.engine
+    })
+    .filter((x) => !inActiveEngineProvider.includes(x))
 
   const groupByEngine = findByEngine
     .filter(function (item, index) {
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index 5650c73bd..951e93bef 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -18,7 +18,11 @@ import { snackbar } from '@/containers/Toast'
 import { FilePathWithSize } from '@/utils/file'
 
 import { extensionManager } from '@/extension'
-import { importingModelsAtom } from '@/helpers/atoms/Model.atom'
+import {
+  addDownloadingModelAtom,
+  importingModelsAtom,
+  removeDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 export type ImportModelStage =
   | 'NONE'
@@ -49,11 +53,25 @@ export type ModelUpdate = {
 const useImportModel = () => {
   const setImportModelStage = useSetAtom(setImportModelStageAtom)
   const setImportingModels = useSetAtom(importingModelsAtom)
+  const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
+  const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
 
   const importModels = useCallback(
-    (models: ImportingModel[], optionType: OptionType) =>
-      localImportModels(models, optionType),
-    []
+    (models: ImportingModel[], optionType: OptionType) => {
+      models
+        .filter((e) => !!e.modelId)
+        .map((model) => {
+          if (model.modelId) {
+            const modelId = model.modelId
+            addDownloadingModel(modelId)
+            extensionManager
+              .get<ModelExtension>(ExtensionTypeEnum.Model)
+              ?.importModel(model.modelId, model.path)
+              .finally(() => removeDownloadingModel(modelId))
+          }
+        })
+    },
+    [addDownloadingModel, removeDownloadingModel]
   )
 
   const updateModelInfo = useCallback(
@@ -101,21 +119,6 @@ const useImportModel = () => {
   return { importModels, updateModelInfo, sanitizeFilePaths }
 }
 
-const localImportModels = async (
-  models: ImportingModel[],
-  // TODO: @louis - We will set this option when cortex.cpp supports it
-  optionType: OptionType
-): Promise<void> => {
-  await models
-    .filter((e) => !!e.modelId)
-    .map((model) => {
-      if (model.modelId)
-        extensionManager
-          .get<ModelExtension>(ExtensionTypeEnum.Model)
-          ?.importModel(model.modelId, model.path)
-    })
-}
-
 const localUpdateModelInfo = async (
   modelInfo: Partial<Model>
 ): Promise<Model | undefined> =>
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index ccb966829..bd9f67ebb 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -21,7 +21,10 @@ import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { assistantsAtom } from '@/helpers/atoms/Assistant.atom'
 
 import { importHuggingFaceModelStageAtom } from '@/helpers/atoms/HuggingFace.atom'
-import { downloadedModelsAtom } from '@/helpers/atoms/Model.atom'
+import {
+  downloadedModelsAtom,
+  getDownloadingModelAtom,
+} from '@/helpers/atoms/Model.atom'
 
 type Props = {
   index: number
@@ -42,11 +45,13 @@ const ModelDownloadRow: React.FC<Props> = ({
   const { downloadModel, abortModelDownload } = useDownloadModel()
   const allDownloadStates = useAtomValue(modelDownloadStateAtom)
   const downloadState: DownloadState | undefined = allDownloadStates[fileName]
+  const downloadingModels = useAtomValue(getDownloadingModelAtom)
 
   const { requestCreateNewThread } = useCreateNewThread()
   const setMainViewState = useSetAtom(mainViewStateAtom)
   const assistants = useAtomValue(assistantsAtom)
   const downloadedModel = downloadedModels.find((md) => md.id === fileName)
+  const isDownloading = downloadingModels.some((md) => md === fileName)
 
   const setHfImportingStage = useSetAtom(importHuggingFaceModelStageAtom)
 
@@ -114,7 +119,7 @@ const ModelDownloadRow: React.FC<Props> = ({
         >
           Use
         </Button>
-      ) : downloadState != null ? (
+      ) : isDownloading ? (
         <Button variant="soft">
           <div className="flex items-center space-x-2">
             <span className="inline-block" onClick={onAbortDownloadClick}>
@@ -129,7 +134,7 @@ const ModelDownloadRow: React.FC<Props> = ({
               }
             />
             <span className="tabular-nums">
-              {formatDownloadPercentage(downloadState.percent)}
+              {formatDownloadPercentage(downloadState?.percent)}
             </span>
           </div>
         </Button>
diff --git a/web/screens/Settings/MyModels/index.tsx b/web/screens/Settings/MyModels/index.tsx
index 547e6153b..ba42d12c2 100644
--- a/web/screens/Settings/MyModels/index.tsx
+++ b/web/screens/Settings/MyModels/index.tsx
@@ -116,7 +116,12 @@ const MyModels = () => {
     getAllSettings()
   }, [])
 
-  const findByEngine = filteredDownloadedModels.map((x) => x.engine)
+  const findByEngine = filteredDownloadedModels.map((x) => {
+    // Legacy engine support - they will be grouped under Cortex LlamaCPP
+    if (x.engine === InferenceEngine.nitro)
+      return InferenceEngine.cortex_llamacpp
+    return x.engine
+  })
   const groupByEngine = findByEngine
     .filter(function (item, index) {
       if (findByEngine.indexOf(item) === index) return item
diff --git a/web/utils/converter.ts b/web/utils/converter.ts
index 9991fb5d7..017a05d1a 100644
--- a/web/utils/converter.ts
+++ b/web/utils/converter.ts
@@ -19,7 +19,7 @@ export const formatDownloadPercentage = (
   options?: { hidePercentage?: boolean }
 ) => {
   if (options?.hidePercentage) return input <= 1 ? input * 100 : input
-  return (input <= 1 ? input * 100 : input).toFixed(2) + '%'
+  return (input <= 1 ? input * 100 : (input ?? 0)).toFixed(2) + '%'
 }
 
 export const formatDownloadSpeed = (input: number | undefined) => {

From 2a0d87a393d3d1e1167ea9c6bcbafc80c939ea70 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 30 Oct 2024 16:05:13 +0700
Subject: [PATCH 41/71] fix: inconsistent models from dropdown and hub

---
 web/containers/ModelDropdown/index.tsx              | 7 ++++++-
 web/screens/Settings/MyModels/MyModelList/index.tsx | 4 ++--
 web/screens/Settings/MyModels/index.tsx             | 7 ++++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/web/containers/ModelDropdown/index.tsx b/web/containers/ModelDropdown/index.tsx
index 4ff8edac5..a58febabf 100644
--- a/web/containers/ModelDropdown/index.tsx
+++ b/web/containers/ModelDropdown/index.tsx
@@ -509,7 +509,12 @@ const ModelDropdown = ({
 
                     <ul className="pb-2">
                       {filteredDownloadedModels
-                        .filter((x) => x.engine === engine)
+                        .filter(
+                          (x) =>
+                            x.engine === engine ||
+                            (x.engine === InferenceEngine.nitro &&
+                              engine === InferenceEngine.cortex_llamacpp)
+                        )
                         .filter((y) => {
                           if (isLocalEngine(y.engine) && !searchText.length) {
                             return downloadedModels.find((c) => c.id === y.id)
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 756520107..26dd26b6c 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -54,14 +54,14 @@ const MyModelList = ({ model }: Props) => {
             <h6
               className={twMerge(
                 'font-medium lg:line-clamp-1 lg:min-w-[280px] lg:max-w-[280px]',
-                model.engine !== InferenceEngine.nitro &&
+                !isLocalEngine(model.engine) &&
                   'max-w-none text-[hsla(var(--text-secondary))]'
               )}
               title={model.name}
             >
               {model.name}
             </h6>
-            {model.engine === InferenceEngine.nitro && (
+            {!isLocalEngine(model.engine) && (
               <div className="flex gap-x-8">
                 <p
                   className="line-clamp-1 text-[hsla(var(--text-secondary))] lg:min-w-[160px] lg:max-w-[160px] xl:max-w-none"
diff --git a/web/screens/Settings/MyModels/index.tsx b/web/screens/Settings/MyModels/index.tsx
index ba42d12c2..218f8cb62 100644
--- a/web/screens/Settings/MyModels/index.tsx
+++ b/web/screens/Settings/MyModels/index.tsx
@@ -250,7 +250,12 @@ const MyModels = () => {
                     <div className="mt-2">
                       {filteredDownloadedModels
                         ? filteredDownloadedModels
-                            .filter((x) => x.engine === engine)
+                            .filter(
+                              (x) =>
+                                x.engine === engine ||
+                                (x.engine === InferenceEngine.nitro &&
+                                  engine === InferenceEngine.cortex_llamacpp)
+                            )
                             .map((model) => {
                               if (!showModel) return null
                               return (

From 5ddbf5fb342af8fae9f7e684328e7e5b6dfd8d65 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 30 Oct 2024 16:08:13 +0700
Subject: [PATCH 42/71] fix: unlink the entire model folder on delete

---
 .../model-extension/src/legacy/delete.ts      | 25 +++++++------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
index a46d90ea5..039eab4cf 100644
--- a/extensions/model-extension/src/legacy/delete.ts
+++ b/extensions/model-extension/src/legacy/delete.ts
@@ -1,18 +1,11 @@
-import { fs, joinPath, Model } from "@janhq/core"
+import { fs, joinPath, Model } from '@janhq/core'
 
 export const deleteModelFiles = async (model: Model) => {
-    try {
-        const dirPath = await joinPath(['file://models', model.id])
-
-        // remove all files under dirPath except model.json
-        const files = await fs.readdirSync(dirPath)
-        const deletePromises = files.map(async (fileName: string) => {
-            if (fileName !== 'model.json') {
-                return fs.unlinkSync(await joinPath([dirPath, fileName]))
-            }
-        })
-        await Promise.allSettled(deletePromises)
-    } catch (err) {
-        console.error(err)
-    }
-}
\ No newline at end of file
+  try {
+    const dirPath = await joinPath(['file://models', model.id])
+    // remove model folder directory
+    await fs.unlinkSync(dirPath)
+  } catch (err) {
+    console.error(err)
+  }
+}

From d0ffe6c6117cd0aea7a17005fa4b045bd8e676f1 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 30 Oct 2024 16:41:47 +0700
Subject: [PATCH 43/71] chore: update electron notarize version

---
 electron/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/electron/package.json b/electron/package.json
index 273062139..4768ae5b7 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -113,7 +113,7 @@
     "@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
   },
   "devDependencies": {
-    "@electron/notarize": "^2.1.0",
+    "@electron/notarize": "^2.3.2",
     "@playwright/test": "^1.38.1",
     "@types/npmcli__arborist": "^5.6.4",
     "@types/pacote": "^11.1.7",

From a986c6de2dc745d5975341b7712d1f4d7b3bea4c Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 1 Nov 2024 16:35:55 +0700
Subject: [PATCH 44/71] chore: decide model name on pull and import

---
 core/src/browser/extensions/model.ts             |  4 ++--
 core/src/types/model/modelInterface.ts           |  4 ++--
 .../inference-cortex-extension/bin/version.txt   |  2 +-
 extensions/model-extension/src/cortex.ts         | 12 ++++++------
 extensions/model-extension/src/index.ts          | 16 +++++++++-------
 extensions/model-extension/src/legacy/delete.ts  |  8 ++++----
 web/hooks/useDownloadModel.test.ts               |  6 ++++--
 web/hooks/useDownloadModel.ts                    |  8 ++++----
 web/hooks/useImportModel.test.ts                 |  4 ++--
 web/hooks/useImportModel.ts                      |  2 +-
 web/screens/Hub/ModelList/ModelHeader/index.tsx  |  2 +-
 .../ModelDownloadRow/index.tsx                   |  6 +++++-
 .../ChatBody/OnDeviceStarterScreen/index.tsx     |  6 ++++--
 13 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index f3609b3b2..b237fad9d 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -13,9 +13,9 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
   }
 
   abstract getModels(): Promise<Model[]>
-  abstract pullModel(model: string, id?: string): Promise<void>
+  abstract pullModel(model: string, id?: string, name?: string): Promise<void>
   abstract cancelModelPull(modelId: string): Promise<void>
-  abstract importModel(model: string, modePath: string): Promise<void>
+  abstract importModel(model: string, modePath: string, name?: string): Promise<void>
   abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
   abstract deleteModel(model: string): Promise<void>
 }
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index b676db949..c35bae9ce 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -9,7 +9,7 @@ export interface ModelInterface {
    * @param model - The model to download.
    * @returns A Promise that resolves when the model has been downloaded.
    */
-  pullModel(model: string, id?: string): Promise<void>
+  pullModel(model: string, id?: string, name?: string): Promise<void>
 
   /**
    * Cancels the download of a specific model.
@@ -43,5 +43,5 @@ export interface ModelInterface {
    * @param model id of the model to import
    * @param modelPath - path of the model file
    */
-  importModel(model: string, modePath: string): Promise<void>
+  importModel(model: string, modePath: string, name?: string): Promise<void>
 }
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index a9d40871b..57d77db55 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.2-rc1
\ No newline at end of file
+1.0.2-rc2
\ No newline at end of file
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index ca9c2b921..50eace5e5 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -8,8 +8,8 @@ import { extractInferenceParams } from '@janhq/core'
 interface ICortexAPI {
   getModel(model: string): Promise<Model>
   getModels(): Promise<Model[]>
-  pullModel(model: string, id?: string): Promise<void>
-  importModel(path: string, modelPath: string): Promise<void>
+  pullModel(model: string, id?: string, name?: string): Promise<void>
+  importModel(path: string, modelPath: string, name?: string): Promise<void>
   deleteModel(model: string): Promise<void>
   updateModel(model: object): Promise<void>
   cancelModelPull(model: string): Promise<void>
@@ -68,10 +68,10 @@ export class CortexAPI implements ICortexAPI {
    * @param model
    * @returns
    */
-  pullModel(model: string, id?: string): Promise<void> {
+  pullModel(model: string, id?: string, name?: string): Promise<void> {
     return this.queue.add(() =>
       ky
-        .post(`${API_URL}/v1/models/pull`, { json: { model, id } })
+        .post(`${API_URL}/v1/models/pull`, { json: { model, id, name } })
         .json()
         .catch(async (e) => {
           throw (await e.response?.json()) ?? e
@@ -85,10 +85,10 @@ export class CortexAPI implements ICortexAPI {
    * @param model
    * @returns
    */
-  importModel(model: string, modelPath: string): Promise<void> {
+  importModel(model: string, modelPath: string, name?: string): Promise<void> {
     return this.queue.add(() =>
       ky
-        .post(`${API_URL}/v1/models/import`, { json: { model, modelPath } })
+        .post(`${API_URL}/v1/models/import`, { json: { model, modelPath, name } })
         .json()
         .catch((e) => console.debug(e)) // Ignore error
         .then()
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 439481bc4..17c00263d 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -58,7 +58,7 @@ export default class JanModelExtension extends ModelExtension {
    * @param model - The model to download.
    * @returns A Promise that resolves when the model is downloaded.
    */
-  async pullModel(model: string, id?: string): Promise<void> {
+  async pullModel(model: string, id?: string, name?: string): Promise<void> {
     if (id) {
       const model: Model = ModelManager.instance().get(id)
       // Clip vision model - should not be handled by cortex.cpp
@@ -74,7 +74,7 @@ export default class JanModelExtension extends ModelExtension {
     /**
      * Sending POST to /models/pull/{id} endpoint to pull the model
      */
-    return this.cortexAPI.pullModel(model, id)
+    return this.cortexAPI.pullModel(model, id, name)
   }
 
   /**
@@ -111,14 +111,12 @@ export default class JanModelExtension extends ModelExtension {
    * @returns A Promise that resolves when the model is deleted.
    */
   async deleteModel(model: string): Promise<void> {
-    const modelDto: Model = ModelManager.instance().get(model)
     return this.cortexAPI
       .deleteModel(model)
       .catch((e) => console.debug(e))
       .finally(async () => {
         // Delete legacy model files
-        if (modelDto)
-          await deleteModelFiles(modelDto).catch((e) => console.debug(e))
+        await deleteModelFiles(model).catch((e) => console.debug(e))
       })
   }
 
@@ -227,8 +225,12 @@ export default class JanModelExtension extends ModelExtension {
    * @param model
    * @param optionType
    */
-  async importModel(model: string, modelPath: string): Promise<void> {
-    return this.cortexAPI.importModel(model, modelPath)
+  async importModel(
+    model: string,
+    modelPath: string,
+    name?: string
+  ): Promise<void> {
+    return this.cortexAPI.importModel(model, modelPath, name)
   }
 
   /**
diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
index 039eab4cf..5288e30ee 100644
--- a/extensions/model-extension/src/legacy/delete.ts
+++ b/extensions/model-extension/src/legacy/delete.ts
@@ -1,10 +1,10 @@
-import { fs, joinPath, Model } from '@janhq/core'
+import { fs, joinPath } from '@janhq/core'
 
-export const deleteModelFiles = async (model: Model) => {
+export const deleteModelFiles = async (id: string) => {
   try {
-    const dirPath = await joinPath(['file://models', model.id])
+    const dirPath = await joinPath(['file://models', id])
     // remove model folder directory
-    await fs.unlinkSync(dirPath)
+    await fs.rm(dirPath)
   } catch (err) {
     console.error(err)
   }
diff --git a/web/hooks/useDownloadModel.test.ts b/web/hooks/useDownloadModel.test.ts
index ff75fbcd8..7e9d7b518 100644
--- a/web/hooks/useDownloadModel.test.ts
+++ b/web/hooks/useDownloadModel.test.ts
@@ -40,7 +40,8 @@ describe('useDownloadModel', () => {
 
     expect(mockExtension.pullModel).toHaveBeenCalledWith(
       mockModel.sources[0].url,
-      mockModel.id
+      mockModel.id,
+      undefined
     )
   })
 
@@ -87,7 +88,8 @@ describe('useDownloadModel', () => {
 
     expect(mockExtension.pullModel).toHaveBeenCalledWith(
       mockModel.sources[0].url,
-      mockModel.id
+      mockModel.id,
+      undefined
     )
   })
 })
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index 3b25cb86f..bbf03e2e7 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -18,9 +18,9 @@ export default function useDownloadModel() {
   const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
 
   const downloadModel = useCallback(
-    async (model: string, id?: string) => {
+    async (model: string, id?: string, name?: string) => {
       addDownloadingModel(id ?? model)
-      downloadLocalModel(model, id).catch((error) => {
+      downloadLocalModel(model, id, name).catch((error) => {
         if (error.message) {
           toaster({
             title: 'Download failed',
@@ -45,10 +45,10 @@ export default function useDownloadModel() {
   }
 }
 
-const downloadLocalModel = async (model: string, id?: string) =>
+const downloadLocalModel = async (model: string, id?: string, name?: string) =>
   extensionManager
     .get<ModelExtension>(ExtensionTypeEnum.Model)
-    ?.pullModel(model, id)
+    ?.pullModel(model, id, name)
 
 const cancelModelDownload = async (model: string) =>
   extensionManager
diff --git a/web/hooks/useImportModel.test.ts b/web/hooks/useImportModel.test.ts
index d37e4a853..9b623226d 100644
--- a/web/hooks/useImportModel.test.ts
+++ b/web/hooks/useImportModel.test.ts
@@ -34,8 +34,8 @@ describe('useImportModel', () => {
       await result.current.importModels(models, 'local' as any)
     })
 
-    expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1')
-    expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2')
+    expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1', undefined)
+    expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2', undefined)
   })
 
   it('should update model info successfully', async () => {
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index 951e93bef..b8f64db98 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -66,7 +66,7 @@ const useImportModel = () => {
             addDownloadingModel(modelId)
             extensionManager
               .get<ModelExtension>(ExtensionTypeEnum.Model)
-              ?.importModel(model.modelId, model.path)
+              ?.importModel(model.modelId, model.path, model.name)
               .finally(() => removeDownloadingModel(modelId))
           }
         })
diff --git a/web/screens/Hub/ModelList/ModelHeader/index.tsx b/web/screens/Hub/ModelList/ModelHeader/index.tsx
index 725b0216a..da98e41e3 100644
--- a/web/screens/Hub/ModelList/ModelHeader/index.tsx
+++ b/web/screens/Hub/ModelList/ModelHeader/index.tsx
@@ -64,7 +64,7 @@ const ModelItemHeader = ({ model, onClick, open }: Props) => {
   const assistants = useAtomValue(assistantsAtom)
 
   const onDownloadClick = useCallback(() => {
-    downloadModel(model.sources[0].url, model.id)
+    downloadModel(model.sources[0].url, model.id, model.name)
   }, [model, downloadModel])
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index bd9f67ebb..dbd2798b7 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -63,7 +63,11 @@ const ModelDownloadRow: React.FC<Props> = ({
 
   const onDownloadClick = useCallback(async () => {
     if (downloadUrl) {
-      downloadModel(downloadUrl, normalizeModelId(downloadUrl))
+      downloadModel(
+        downloadUrl,
+        normalizeModelId(downloadUrl),
+        normalizeModelId(downloadUrl)
+      )
     }
   }, [downloadUrl, downloadModel])
 
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 366575a40..0b999c19d 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -170,7 +170,8 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                                   onClick={() =>
                                     downloadModel(
                                       model.sources[0].url,
-                                      model.id
+                                      model.id,
+                                      model.name
                                     )
                                   }
                                 />
@@ -261,7 +262,8 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                             onClick={() =>
                               downloadModel(
                                 featModel.sources[0].url,
-                                featModel.id
+                                featModel.id,
+                                featModel.name
                               )
                             }
                           >

From b913af9f88428b90a81a12b038560cfdcd2ac1e9 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 1 Nov 2024 17:09:56 +0700
Subject: [PATCH 45/71] chore: model id is optional on import

---
 web/hooks/useImportModel.ts | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index b8f64db98..d84690715 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -4,8 +4,10 @@ import {
   ExtensionTypeEnum,
   ImportingModel,
   Model,
+  ModelEvent,
   ModelExtension,
   OptionType,
+  events,
   fs,
 } from '@janhq/core'
 
@@ -58,18 +60,19 @@ const useImportModel = () => {
 
   const importModels = useCallback(
     (models: ImportingModel[], optionType: OptionType) => {
-      models
-        .filter((e) => !!e.modelId)
-        .map((model) => {
-          if (model.modelId) {
-            const modelId = model.modelId
-            addDownloadingModel(modelId)
-            extensionManager
-              .get<ModelExtension>(ExtensionTypeEnum.Model)
-              ?.importModel(model.modelId, model.path, model.name)
-              .finally(() => removeDownloadingModel(modelId))
-          }
-        })
+      models.map((model) => {
+        const modelId = model.modelId ?? model.path.split('/').pop()
+        if (modelId) {
+          addDownloadingModel(modelId)
+          extensionManager
+            .get<ModelExtension>(ExtensionTypeEnum.Model)
+            ?.importModel(modelId, model.path, model.name)
+            .finally(() => {
+              removeDownloadingModel(modelId)
+              events.emit(ModelEvent.OnModelsUpdate, {})
+            })
+        }
+      })
     },
     [addDownloadingModel, removeDownloadingModel]
   )

From 46d5faf59fbe159f4a9d1648b53d44b88e9c3c84 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 4 Nov 2024 20:36:04 +0700
Subject: [PATCH 46/71] chore: new cortex-cpp binary - model import option and
 model size

---
 core/src/browser/extensions/model.ts             |  2 +-
 core/src/types/model/modelImport.ts              |  2 +-
 core/src/types/model/modelInterface.ts           |  8 +++++++-
 .../inference-cortex-extension/bin/version.txt   |  2 +-
 extensions/model-extension/src/cortex.ts         | 14 +++++++++++---
 extensions/model-extension/src/index.ts          |  6 ++++--
 web/hooks/useImportModel.ts                      |  8 ++++++--
 web/hooks/useModels.ts                           | 16 ++++++++++++++--
 .../Settings/ImportModelOptionModal/index.tsx    |  4 ++--
 9 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index b237fad9d..1fb94fba3 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -15,7 +15,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
   abstract getModels(): Promise<Model[]>
   abstract pullModel(model: string, id?: string, name?: string): Promise<void>
   abstract cancelModelPull(modelId: string): Promise<void>
-  abstract importModel(model: string, modePath: string, name?: string): Promise<void>
+  abstract importModel(model: string, modePath: string, name?: string, optionType?: OptionType): Promise<void>
   abstract updateModel(modelInfo: Partial<Model>): Promise<Model>
   abstract deleteModel(model: string): Promise<void>
 }
diff --git a/core/src/types/model/modelImport.ts b/core/src/types/model/modelImport.ts
index 7c72a691b..3f0ddab10 100644
--- a/core/src/types/model/modelImport.ts
+++ b/core/src/types/model/modelImport.ts
@@ -1,4 +1,4 @@
-export type OptionType = 'SYMLINK' | 'MOVE_BINARY_FILE'
+export type OptionType = 'symlink' | 'copy'
 
 export type ModelImportOption = {
   type: OptionType
diff --git a/core/src/types/model/modelInterface.ts b/core/src/types/model/modelInterface.ts
index c35bae9ce..7ad1b136c 100644
--- a/core/src/types/model/modelInterface.ts
+++ b/core/src/types/model/modelInterface.ts
@@ -1,4 +1,5 @@
 import { Model } from './modelEntity'
+import { OptionType } from './modelImport'
 
 /**
  * Model extension for managing models.
@@ -43,5 +44,10 @@ export interface ModelInterface {
    * @param model id of the model to import
    * @param modelPath - path of the model file
    */
-  importModel(model: string, modePath: string, name?: string): Promise<void>
+  importModel(
+    model: string,
+    modePath: string,
+    name?: string,
+    optionType?: OptionType
+  ): Promise<void>
 }
diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index 57d77db55..c89636bcf 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.2-rc2
\ No newline at end of file
+1.0.2-rc4
\ No newline at end of file
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index 50eace5e5..024aa2223 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -9,7 +9,7 @@ interface ICortexAPI {
   getModel(model: string): Promise<Model>
   getModels(): Promise<Model[]>
   pullModel(model: string, id?: string, name?: string): Promise<void>
-  importModel(path: string, modelPath: string, name?: string): Promise<void>
+  importModel(path: string, modelPath: string, name?: string, option?: string): Promise<void>
   deleteModel(model: string): Promise<void>
   updateModel(model: object): Promise<void>
   cancelModelPull(model: string): Promise<void>
@@ -85,10 +85,17 @@ export class CortexAPI implements ICortexAPI {
    * @param model
    * @returns
    */
-  importModel(model: string, modelPath: string, name?: string): Promise<void> {
+  importModel(
+    model: string,
+    modelPath: string,
+    name?: string,
+    option?: string
+  ): Promise<void> {
     return this.queue.add(() =>
       ky
-        .post(`${API_URL}/v1/models/import`, { json: { model, modelPath, name } })
+        .post(`${API_URL}/v1/models/import`, {
+          json: { model, modelPath, name, option },
+        })
         .json()
         .catch((e) => console.debug(e)) // Ignore error
         .then()
@@ -208,6 +215,7 @@ export class CortexAPI implements ICortexAPI {
     }
     model.metadata = model.metadata ?? {
       tags: [],
+      size: model.size ?? model.metadata?.size ?? 0
     }
     return model as Model
   }
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 17c00263d..e62e5b2ee 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -9,6 +9,7 @@ import {
   DownloadState,
   events,
   DownloadEvent,
+  OptionType
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
@@ -228,9 +229,10 @@ export default class JanModelExtension extends ModelExtension {
   async importModel(
     model: string,
     modelPath: string,
-    name?: string
+    name?: string,
+    option?: OptionType
   ): Promise<void> {
-    return this.cortexAPI.importModel(model, modelPath, name)
+    return this.cortexAPI.importModel(model, modelPath, name, option)
   }
 
   /**
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index d84690715..093385f0d 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -3,6 +3,7 @@ import { useCallback } from 'react'
 import {
   ExtensionTypeEnum,
   ImportingModel,
+  LocalImportModelEvent,
   Model,
   ModelEvent,
   ModelExtension,
@@ -66,10 +67,13 @@ const useImportModel = () => {
           addDownloadingModel(modelId)
           extensionManager
             .get<ModelExtension>(ExtensionTypeEnum.Model)
-            ?.importModel(modelId, model.path, model.name)
+            ?.importModel(modelId, model.path, model.name, optionType)
             .finally(() => {
               removeDownloadingModel(modelId)
-              events.emit(ModelEvent.OnModelsUpdate, {})
+              events.emit(LocalImportModelEvent.onLocalImportModelSuccess, {
+                importId: model.importId,
+                modelId: modelId,
+              })
             })
         }
       })
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 0c898119c..f3004f823 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -43,7 +43,19 @@ const useModels = () => {
         .models.values()
         .toArray()
         .filter((e) => !isLocalEngine(e.engine))
-      setDownloadedModels([...localModels, ...remoteModels])
+      const toUpdate = [...localModels, ...remoteModels]
+      setDownloadedModels(toUpdate)
+
+      let isUpdated = false
+      toUpdate.forEach((model) => {
+        if (!ModelManager.instance().models.has(model.id)) {
+          ModelManager.instance().models.set(model.id, model)
+          isUpdated = true
+        }
+      })
+      if (isUpdated) {
+        getExtensionModels()
+      }
     }
 
     const getExtensionModels = async () => {
@@ -52,7 +64,7 @@ const useModels = () => {
     }
 
     // Fetch all data
-    Promise.all([getDownloadedModels(), getExtensionModels()])
+    getExtensionModels().then(getDownloadedModels)
   }, [setDownloadedModels, setExtensionModels])
 
   const reloadData = useDebouncedCallback(() => getData(), 300)
diff --git a/web/screens/Settings/ImportModelOptionModal/index.tsx b/web/screens/Settings/ImportModelOptionModal/index.tsx
index 5a2af2335..f185b9015 100644
--- a/web/screens/Settings/ImportModelOptionModal/index.tsx
+++ b/web/screens/Settings/ImportModelOptionModal/index.tsx
@@ -15,13 +15,13 @@ import { importingModelsAtom } from '@/helpers/atoms/Model.atom'
 
 const importOptions: ModelImportOption[] = [
   {
-    type: 'SYMLINK',
+    type: 'symlink',
     title: 'Keep Original Files & Symlink',
     description:
       'You maintain your model files outside of Jan. Keeping your files where they are, and Jan will create a smart link to them.',
   },
   {
-    type: 'MOVE_BINARY_FILE',
+    type: 'copy',
     title: 'Move model binary file',
     description:
       'Jan will move your model binary file from your current folder into Jan Data Folder.',

From 92906ea2fa790be7e06e43e1a2dace893319c9c5 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 4 Nov 2024 20:36:25 +0700
Subject: [PATCH 47/71] ui: system monitor should not cover input box (#3942)

---
 .../Layout/BottomPanel/SystemMonitor/index.tsx         |  2 +-
 web/screens/Thread/ThreadCenterPanel/index.tsx         | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
index 7fdc598ec..ae91cfa9d 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@@ -94,7 +94,7 @@ const SystemMonitor = () => {
         <div
           ref={setElementExpand}
           className={twMerge(
-            'fixed bottom-9 left-[49px] z-50 flex w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
+            'fixed bottom-9 left-[49px] z-50 flex h-[200px] w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
             showFullScreen && 'h-[calc(100%-63px)]',
             reduceTransparent && 'w-[calc(100%-48px)] rounded-none'
           )}
diff --git a/web/screens/Thread/ThreadCenterPanel/index.tsx b/web/screens/Thread/ThreadCenterPanel/index.tsx
index b12f859bd..5213a016a 100644
--- a/web/screens/Thread/ThreadCenterPanel/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/index.tsx
@@ -25,6 +25,7 @@ import ChatBody from '@/screens/Thread/ThreadCenterPanel/ChatBody'
 import ChatInput from './ChatInput'
 import RequestDownloadModel from './RequestDownloadModel'
 
+import { showSystemMonitorPanelAtom } from '@/helpers/atoms/App.atom'
 import { experimentalFeatureEnabledAtom } from '@/helpers/atoms/AppConfig.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
@@ -144,6 +145,8 @@ const ThreadCenterPanel = () => {
 
   const isGeneratingResponse = useAtomValue(isGeneratingResponseAtom)
 
+  const showSystemMonitorPanel = useAtomValue(showSystemMonitorPanelAtom)
+
   return (
     <CenterPanelContainer>
       <div
@@ -188,7 +191,12 @@ const ThreadCenterPanel = () => {
             </div>
           </div>
         )}
-        <div className="flex h-full w-full flex-col justify-between">
+        <div
+          className={twMerge(
+            'flex h-full w-full flex-col justify-between',
+            showSystemMonitorPanel && 'h-[calc(100%-200px)]'
+          )}
+        >
           {activeThread ? (
             <div className="flex h-full w-full overflow-x-hidden">
               <ChatBody />

From d2fa38f0812ca898c8967e85debfcfd60fefc8f6 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 5 Nov 2024 08:33:58 +0700
Subject: [PATCH 48/71] test: correct tests

---
 web/hooks/useImportModel.test.ts | 6 +++---
 web/hooks/useModels.test.ts      | 2 ++
 web/hooks/useModels.ts           | 6 +++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/web/hooks/useImportModel.test.ts b/web/hooks/useImportModel.test.ts
index 9b623226d..571947903 100644
--- a/web/hooks/useImportModel.test.ts
+++ b/web/hooks/useImportModel.test.ts
@@ -31,11 +31,11 @@ describe('useImportModel', () => {
     ] as any
 
     await act(async () => {
-      await result.current.importModels(models, 'local' as any)
+      await result.current.importModels(models, 'copy')
     })
 
-    expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1', undefined)
-    expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2', undefined)
+    expect(mockImportModels).toHaveBeenCalledWith('1', '/path/to/model1', undefined,'copy')
+    expect(mockImportModels).toHaveBeenCalledWith('2', '/path/to/model2', undefined, 'copy')
   })
 
   it('should update model info successfully', async () => {
diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 0440b5443..9b6b898ad 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -35,6 +35,8 @@ describe('useModels', () => {
           }),
         }),
         get: () => undefined,
+        has: () => true,
+        // set: () => {}
       },
     })
 
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index f3004f823..c856f6e3c 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -58,13 +58,13 @@ const useModels = () => {
       }
     }
 
-    const getExtensionModels = async () => {
+    const getExtensionModels = () => {
       const models = ModelManager.instance().models.values().toArray()
       setExtensionModels(models)
     }
-
     // Fetch all data
-    getExtensionModels().then(getDownloadedModels)
+    getExtensionModels()
+    getDownloadedModels()
   }, [setDownloadedModels, setExtensionModels])
 
   const reloadData = useDebouncedCallback(() => getData(), 300)

From 2c8c76afa6784c7308b49de154dcf805ba7c4cf8 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 5 Nov 2024 14:36:18 +0700
Subject: [PATCH 49/71] chore: fix model ID display in my models

---
 web/screens/Settings/MyModels/MyModelList/index.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 26dd26b6c..9b2301f8b 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -61,7 +61,7 @@ const MyModelList = ({ model }: Props) => {
             >
               {model.name}
             </h6>
-            {!isLocalEngine(model.engine) && (
+            {isLocalEngine(model.engine) && (
               <div className="flex gap-x-8">
                 <p
                   className="line-clamp-1 text-[hsla(var(--text-secondary))] lg:min-w-[160px] lg:max-w-[160px] xl:max-w-none"

From 23764caedf71b906c5e665bc8a1701e5574abe4a Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Tue, 5 Nov 2024 16:14:35 +0700
Subject: [PATCH 50/71] fix: controlling word breaks (#3952)

---
 .../Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
index 6afd4b652..12bcf7a4d 100644
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@@ -283,7 +283,7 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
           ) : (
             <div
               className={twMerge(
-                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto leading-relaxed'
+                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto break-all leading-relaxed	'
               )}
               dangerouslySetInnerHTML={{ __html: parsedText }}
             />

From 1ad897782cb764abd46a54720993594ea2bf6a74 Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Tue, 5 Nov 2024 19:40:19 +0700
Subject: [PATCH 51/71] feat: new nightly app channel (#3948)

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .github/scripts/rename-app-beta.sh            | 47 --------------
 .github/scripts/rename-app.sh                 | 55 ++++++++++++++++
 ...nstaller-beta.sh => rename-uninstaller.sh} | 10 ++-
 ...-workspace-beta.sh => rename-workspace.sh} |  4 +-
 .../workflows/jan-electron-build-nightly.yml  | 64 +++++++++----------
 .../workflows/template-build-linux-x64.yml    | 17 +++--
 .../workflows/template-build-macos-arm64.yml  | 19 ++++--
 .../workflows/template-build-macos-x64.yml    | 19 ++++--
 .../workflows/template-build-windows-x64.yml  | 25 ++++++--
 electron/package.json                         |  2 +-
 10 files changed, 156 insertions(+), 106 deletions(-)
 delete mode 100644 .github/scripts/rename-app-beta.sh
 create mode 100644 .github/scripts/rename-app.sh
 rename .github/scripts/{rename-uninstaller-beta.sh => rename-uninstaller.sh} (61%)
 rename .github/scripts/{rename-workspace-beta.sh => rename-workspace.sh} (74%)

diff --git a/.github/scripts/rename-app-beta.sh b/.github/scripts/rename-app-beta.sh
deleted file mode 100644
index a12d1d635..000000000
--- a/.github/scripts/rename-app-beta.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-# Check if the correct number of arguments is provided
-if [ "$#" -ne 1 ]; then
-    echo "Usage: $0 <path_to_json_input_file>"
-    exit 1
-fi
-
-INPUT_JSON_FILE="$1"
-
-# Check if the input file exists
-if [ ! -f "$INPUT_JSON_FILE" ]; then
-    echo "Input file not found: $INPUT_JSON_FILE"
-    exit 1
-fi
-
-# Use jq to transform the content
-jq '
-    .name = "jan-beta" |
-    .productName = "Jan-beta" |
-    .build.appId = "jan-beta.ai.app" |
-    .build.productName = "Jan-beta" |
-    .build.appId = "jan-beta.ai.app" |
-    .build.protocols[0].name = "Jan-beta" |
-    .build.protocols[0].schemes = ["jan-beta"] |
-    .build.artifactName = "jan-beta-${os}-${arch}-${version}.${ext}" |
-    .build.publish[0].channel = "beta"
-' "$INPUT_JSON_FILE" > ./package.json.tmp
-
-cat ./package.json.tmp
-
-rm $INPUT_JSON_FILE
-mv ./package.json.tmp $INPUT_JSON_FILE
-
-# Update the layout file
-LAYOUT_FILE_PATH="web/app/layout.tsx"
-
-if [ ! -f "$LAYOUT_FILE_PATH" ]; then
-    echo "File does not exist: $LAYOUT_FILE_PATH"
-    exit 1
-fi
-
-# Perform the replacements
-sed -i -e "s#Jan#Jan-beta#g" "$LAYOUT_FILE_PATH"
-
-# Notify completion
-echo "File has been updated: $LAYOUT_FILE_PATH"
\ No newline at end of file
diff --git a/.github/scripts/rename-app.sh b/.github/scripts/rename-app.sh
new file mode 100644
index 000000000..7c2ad6ef3
--- /dev/null
+++ b/.github/scripts/rename-app.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Check if the correct number of arguments is provided
+if [ "$#" -ne 2 ]; then
+    echo "Usage: $0 <path_to_json_input_file> <channel>"
+    exit 1
+fi
+
+INPUT_JSON_FILE="$1"
+
+CHANNEL="$2"
+
+if [ "$CHANNEL" == "nightly" ]; then
+    UPDATER="latest"
+else
+    UPDATER="beta"
+fi
+
+# Check if the input file exists
+if [ ! -f "$INPUT_JSON_FILE" ]; then
+    echo "Input file not found: $INPUT_JSON_FILE"
+    exit 1
+fi
+
+# Use jq to transform the content
+jq --arg channel "$CHANNEL" --arg updater "$UPDATER" '
+    .name = "jan-\($channel)" |
+    .productName = "Jan-\($channel)" |
+    .build.appId = "jan-\($channel).ai.app" |
+    .build.productName = "Jan-\($channel)" |
+    .build.appId = "jan-\($channel).ai.app" |
+    .build.protocols[0].name = "Jan-\($channel)" |
+    .build.protocols[0].schemes = ["jan-\($channel)"] |
+    .build.artifactName = "jan-\($channel)-${os}-${arch}-${version}.${ext}" |
+    .build.publish[0].channel = $updater
+' "$INPUT_JSON_FILE" > ./package.json.tmp
+
+cat ./package.json.tmp
+
+rm $INPUT_JSON_FILE
+mv ./package.json.tmp $INPUT_JSON_FILE
+
+# Update the layout file
+LAYOUT_FILE_PATH="web/app/layout.tsx"
+
+if [ ! -f "$LAYOUT_FILE_PATH" ]; then
+    echo "File does not exist: $LAYOUT_FILE_PATH"
+    exit 1
+fi
+
+# Perform the replacements
+sed -i -e "s#Jan#Jan-$CHANNEL#g" "$LAYOUT_FILE_PATH"
+
+# Notify completion
+echo "File has been updated: $LAYOUT_FILE_PATH"
diff --git a/.github/scripts/rename-uninstaller-beta.sh b/.github/scripts/rename-uninstaller.sh
similarity index 61%
rename from .github/scripts/rename-uninstaller-beta.sh
rename to .github/scripts/rename-uninstaller.sh
index c322825da..7d3992fd0 100644
--- a/.github/scripts/rename-uninstaller-beta.sh
+++ b/.github/scripts/rename-uninstaller.sh
@@ -3,6 +3,14 @@
 # File path to be modified
 FILE_PATH="electron/scripts/uninstaller.nsh"
 
+# Check if the correct number of arguments is provided
+if [ "$#" -ne 1 ]; then
+    echo "Usage: $0 <channel>"
+    exit 1
+fi
+
+CHANNEL="$1"
+
 # Check if the file exists
 if [ ! -f "$FILE_PATH" ]; then
     echo "File does not exist: $FILE_PATH"
@@ -10,7 +18,7 @@ if [ ! -f "$FILE_PATH" ]; then
 fi
 
 # Perform the replacements
-sed -i -e "s#jan#jan-beta#g" "$FILE_PATH"
+sed -i -e "s#jan#jan-$CHANNEL#g" "$FILE_PATH"
 
 # Notify completion
 echo "File has been updated: $FILE_PATH"
\ No newline at end of file
diff --git a/.github/scripts/rename-workspace-beta.sh b/.github/scripts/rename-workspace.sh
similarity index 74%
rename from .github/scripts/rename-workspace-beta.sh
rename to .github/scripts/rename-workspace.sh
index 6286d1889..420042e2c 100644
--- a/.github/scripts/rename-workspace-beta.sh
+++ b/.github/scripts/rename-workspace.sh
@@ -3,6 +3,8 @@
 # File path to be modified
 FILE_PATH="$1"
 
+CHANNEL="$2"
+
 # Check if the file exists
 if [ ! -f "$FILE_PATH" ]; then
     echo "File does not exist: $FILE_PATH"
@@ -10,7 +12,7 @@ if [ ! -f "$FILE_PATH" ]; then
 fi
 
 # Perform the replacements
-sed -i -e 's/yarn workspace jan/yarn workspace jan-beta/g' "$FILE_PATH"
+sed -i -e "s/yarn workspace jan/yarn workspace jan-$CHANNEL/g" "$FILE_PATH"
 
 # Notify completion
 echo "File has been updated: $FILE_PATH"
\ No newline at end of file
diff --git a/.github/workflows/jan-electron-build-nightly.yml b/.github/workflows/jan-electron-build-nightly.yml
index d79080990..1b29b84af 100644
--- a/.github/workflows/jan-electron-build-nightly.yml
+++ b/.github/workflows/jan-electron-build-nightly.yml
@@ -114,8 +114,8 @@ jobs:
       - name: Upload latest-mac.yml
         if: ${{ needs.set-public-provider.outputs.public_provider == 'aws-s3' }}
         run: |
-          aws s3 cp ./latest-mac.yml "s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-latest/latest-mac.yml"
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-latest/ s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/latest/
+          aws s3 cp ./latest-mac.yml "s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-nightly/latest-mac.yml"
+          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-nightly/ s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/nightly/
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
@@ -123,35 +123,35 @@ jobs:
           AWS_EC2_METADATA_DISABLED: "true"
 
     
-  noti-discord-nightly-and-update-url-readme:
-    needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
-    secrets: inherit
-    if: github.event_name == 'schedule'
-    uses: ./.github/workflows/template-noti-discord-and-update-url-readme.yml
-    with:
-      ref: refs/heads/dev
-      build_reason: Nightly
-      push_to_branch: dev
-      new_version: ${{ needs.get-update-version.outputs.new_version }}
+  # noti-discord-nightly-and-update-url-readme:
+  #   needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
+  #   secrets: inherit
+  #   if: github.event_name == 'schedule'
+  #   uses: ./.github/workflows/template-noti-discord-and-update-url-readme.yml
+  #   with:
+  #     ref: refs/heads/dev
+  #     build_reason: Nightly
+  #     push_to_branch: dev
+  #     new_version: ${{ needs.get-update-version.outputs.new_version }}
 
-  noti-discord-pre-release-and-update-url-readme:
-    needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
-    secrets: inherit
-    if: github.event_name == 'push'
-    uses: ./.github/workflows/template-noti-discord-and-update-url-readme.yml
-    with:
-      ref: refs/heads/dev
-      build_reason: Pre-release
-      push_to_branch: dev
-      new_version: ${{ needs.get-update-version.outputs.new_version }}
+  # noti-discord-pre-release-and-update-url-readme:
+  #   needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
+  #   secrets: inherit
+  #   if: github.event_name == 'push'
+  #   uses: ./.github/workflows/template-noti-discord-and-update-url-readme.yml
+  #   with:
+  #     ref: refs/heads/dev
+  #     build_reason: Pre-release
+  #     push_to_branch: dev
+  #     new_version: ${{ needs.get-update-version.outputs.new_version }}
 
-  noti-discord-manual-and-update-url-readme:
-    needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
-    secrets: inherit
-    if: github.event_name == 'workflow_dispatch' && github.event.inputs.public_provider == 'aws-s3'
-    uses: ./.github/workflows/template-noti-discord-and-update-url-readme.yml
-    with:
-      ref: refs/heads/dev
-      build_reason: Manual
-      push_to_branch: dev
-      new_version: ${{ needs.get-update-version.outputs.new_version }}
+  # noti-discord-manual-and-update-url-readme:
+  #   needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
+  #   secrets: inherit
+  #   if: github.event_name == 'workflow_dispatch' && github.event.inputs.public_provider == 'aws-s3'
+  #   uses: ./.github/workflows/template-noti-discord-and-update-url-readme.yml
+  #   with:
+  #     ref: refs/heads/dev
+  #     build_reason: Manual
+  #     push_to_branch: dev
+  #     new_version: ${{ needs.get-update-version.outputs.new_version }}
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 496d153ae..afd5f6647 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -60,18 +60,25 @@ jobs:
           mv /tmp/package.json electron/package.json
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
diff --git a/.github/workflows/template-build-macos-arm64.yml b/.github/workflows/template-build-macos-arm64.yml
index 40cdda627..46f884473 100644
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@@ -72,22 +72,29 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           cat electron/package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
@@ -186,7 +193,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-mac-arm64-${{ inputs.new_version }}
-          path: ./electron/dist/jan-mac-arm64-${{ inputs.new_version }}.dmg
+          path: ./electron/dist/*.dmg
 
       - name: Upload Artifact
         if: inputs.beta == false
diff --git a/.github/workflows/template-build-macos-x64.yml b/.github/workflows/template-build-macos-x64.yml
index f139797af..7781eb630 100644
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@@ -72,22 +72,29 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           cat electron/package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
@@ -186,7 +193,7 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: jan-mac-x64-${{ inputs.new_version }}
-          path: ./electron/dist/jan-mac-x64-${{ inputs.new_version }}.dmg
+          path: ./electron/dist/*.dmg
 
       - name: Upload Artifact
         if: inputs.beta == false
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index ffe94fecc..76db4aadc 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -73,23 +73,34 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "${{ secrets.CLOUDFLARE_R2_PUBLIC_URL }}", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          chmod +x .github/scripts/rename-uninstaller.sh
+          .github/scripts/rename-uninstaller.sh nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
+          cat ./package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
         shell: bash
         run: |
-          chmod +x .github/scripts/rename-app-beta.sh
-          .github/scripts/rename-app-beta.sh ./electron/package.json
-          chmod +x .github/scripts/rename-workspace-beta.sh
-          .github/scripts/rename-workspace-beta.sh ./package.json
-          chmod +x .github/scripts/rename-uninstaller-beta.sh
-          .github/scripts/rename-uninstaller-beta.sh
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json beta
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json beta
+          chmod +x .github/scripts/rename-uninstaller.sh
+          .github/scripts/rename-uninstaller.sh beta
           echo "------------------------"
           cat ./electron/package.json
           echo "------------------------"
diff --git a/electron/package.json b/electron/package.json
index feaee5e16..f9824b895 100644
--- a/electron/package.json
+++ b/electron/package.json
@@ -111,7 +111,7 @@
     "@kirillvakalov/nut-tree__nut-js": "4.2.1-2"
   },
   "devDependencies": {
-    "@electron/notarize": "^2.1.0",
+    "@electron/notarize": "^2.5.0",
     "@playwright/test": "^1.38.1",
     "@types/npmcli__arborist": "^5.6.4",
     "@types/pacote": "^11.1.7",

From 964269dc467b44f1780f368451c59d717ed0e52d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 6 Nov 2024 09:20:54 +0700
Subject: [PATCH 52/71] fix: 3911 - inconsistent between download progress and
 model hub

---
 core/src/node/api/processors/download.ts          |  5 -----
 core/src/types/file/index.ts                      |  4 ++--
 .../Layout/BottomPanel/DownloadingState/index.tsx |  9 +++++++--
 web/containers/ModalCancelDownload/index.tsx      | 13 +++++++------
 web/hooks/useDownloadModel.ts                     | 15 ++++++++++++++-
 web/hooks/useDownloadState.ts                     | 10 ++++++++++
 .../Settings/MyModels/MyModelList/index.tsx       |  2 +-
 7 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/core/src/node/api/processors/download.ts b/core/src/node/api/processors/download.ts
index 5db18a53a..20b87294b 100644
--- a/core/src/node/api/processors/download.ts
+++ b/core/src/node/api/processors/download.ts
@@ -50,11 +50,6 @@ export class Downloader implements Processor {
     const initialDownloadState: DownloadState = {
       modelId,
       fileName,
-      time: {
-        elapsed: 0,
-        remaining: 0,
-      },
-      speed: 0,
       percent: 0,
       size: {
         total: 0,
diff --git a/core/src/types/file/index.ts b/core/src/types/file/index.ts
index 9f3e32b3e..87d83c51d 100644
--- a/core/src/types/file/index.ts
+++ b/core/src/types/file/index.ts
@@ -6,8 +6,8 @@ export type FileStat = {
 export type DownloadState = {
   modelId: string // TODO: change to download id
   fileName: string
-  time: DownloadTime
-  speed: number
+  time?: DownloadTime
+  speed?: number
 
   percent: number
   size: DownloadSize
diff --git a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
index 8eb16f549..dc9ffca89 100644
--- a/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
+++ b/web/containers/Layout/BottomPanel/DownloadingState/index.tsx
@@ -2,15 +2,19 @@ import { Fragment } from 'react'
 
 import { Progress, Modal, Button } from '@janhq/joi'
 
-import { useAtomValue } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'
 
 import useDownloadModel from '@/hooks/useDownloadModel'
-import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
+import {
+  modelDownloadStateAtom,
+  removeDownloadStateAtom,
+} from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
 export default function DownloadingState() {
   const downloadStates = useAtomValue(modelDownloadStateAtom)
+  const removeDownloadState = useSetAtom(removeDownloadStateAtom)
   const { abortModelDownload } = useDownloadModel()
 
   const totalCurrentProgress = Object.values(downloadStates)
@@ -73,6 +77,7 @@ export default function DownloadingState() {
                       theme="destructive"
                       onClick={() => {
                         if (item?.modelId) {
+                          removeDownloadState(item?.modelId)
                           abortModelDownload(item?.modelId)
                         }
                       }}
diff --git a/web/containers/ModalCancelDownload/index.tsx b/web/containers/ModalCancelDownload/index.tsx
index 8a92c9279..1826c78a7 100644
--- a/web/containers/ModalCancelDownload/index.tsx
+++ b/web/containers/ModalCancelDownload/index.tsx
@@ -8,12 +8,13 @@ import { useAtomValue, useSetAtom } from 'jotai'
 
 import useDownloadModel from '@/hooks/useDownloadModel'
 
-import { modelDownloadStateAtom } from '@/hooks/useDownloadState'
+import {
+  modelDownloadStateAtom,
+  removeDownloadStateAtom,
+} from '@/hooks/useDownloadState'
 
 import { formatDownloadPercentage } from '@/utils/converter'
 
-import { removeDownloadingModelAtom } from '@/helpers/atoms/Model.atom'
-
 type Props = {
   model: Model
   isFromList?: boolean
@@ -21,16 +22,16 @@ type Props = {
 
 const ModalCancelDownload = ({ model, isFromList }: Props) => {
   const { abortModelDownload } = useDownloadModel()
-  const removeModelDownload = useSetAtom(removeDownloadingModelAtom)
+  const removeDownloadState = useSetAtom(removeDownloadStateAtom)
   const allDownloadStates = useAtomValue(modelDownloadStateAtom)
   const downloadState = allDownloadStates[model.id]
 
   const cancelText = `Cancel ${formatDownloadPercentage(downloadState?.percent ?? 0)}`
 
   const onAbortDownloadClick = useCallback(() => {
-    removeModelDownload(model.id)
+    removeDownloadState(model.id)
     abortModelDownload(downloadState?.modelId ?? model.id)
-  }, [downloadState, abortModelDownload, removeModelDownload, model])
+  }, [downloadState, abortModelDownload, removeDownloadState, model])
 
   return (
     <Modal
diff --git a/web/hooks/useDownloadModel.ts b/web/hooks/useDownloadModel.ts
index bbf03e2e7..c616f8769 100644
--- a/web/hooks/useDownloadModel.ts
+++ b/web/hooks/useDownloadModel.ts
@@ -6,6 +6,8 @@ import { useSetAtom } from 'jotai'
 
 import { toaster } from '@/containers/Toast'
 
+import { setDownloadStateAtom } from './useDownloadState'
+
 import { extensionManager } from '@/extension/ExtensionManager'
 
 import {
@@ -16,10 +18,21 @@ import {
 export default function useDownloadModel() {
   const removeDownloadingModel = useSetAtom(removeDownloadingModelAtom)
   const addDownloadingModel = useSetAtom(addDownloadingModelAtom)
+  const setDownloadStates = useSetAtom(setDownloadStateAtom)
 
   const downloadModel = useCallback(
     async (model: string, id?: string, name?: string) => {
       addDownloadingModel(id ?? model)
+      setDownloadStates({
+        modelId: id ?? model,
+        downloadState: 'downloading',
+        fileName: id ?? model,
+        size: {
+          total: 0,
+          transferred: 0,
+        },
+        percent: 0,
+      })
       downloadLocalModel(model, id, name).catch((error) => {
         if (error.message) {
           toaster({
@@ -32,7 +45,7 @@ export default function useDownloadModel() {
         removeDownloadingModel(model)
       })
     },
-    [removeDownloadingModel, addDownloadingModel]
+    [removeDownloadingModel, addDownloadingModel, setDownloadStates]
   )
 
   const abortModelDownload = useCallback(async (model: string) => {
diff --git a/web/hooks/useDownloadState.ts b/web/hooks/useDownloadState.ts
index b6d9ec49a..32a9d3255 100644
--- a/web/hooks/useDownloadState.ts
+++ b/web/hooks/useDownloadState.ts
@@ -10,8 +10,18 @@ import {
 } from '@/helpers/atoms/Model.atom'
 
 // download states
+
 export const modelDownloadStateAtom = atom<Record<string, DownloadState>>({})
 
+/**
+ * Remove a download state for a particular model.
+ */
+export const removeDownloadStateAtom = atom(null, (get, set, id: string) => {
+  const currentState = { ...get(modelDownloadStateAtom) }
+  delete currentState[id]
+  set(modelDownloadStateAtom, currentState)
+  set(removeDownloadingModelAtom, id)
+})
 /**
  * Used to set the download state for a particular model.
  */
diff --git a/web/screens/Settings/MyModels/MyModelList/index.tsx b/web/screens/Settings/MyModels/MyModelList/index.tsx
index 9b2301f8b..2e87f3080 100644
--- a/web/screens/Settings/MyModels/MyModelList/index.tsx
+++ b/web/screens/Settings/MyModels/MyModelList/index.tsx
@@ -1,6 +1,6 @@
 import { memo, useState } from 'react'
 
-import { InferenceEngine, Model } from '@janhq/core'
+import { Model } from '@janhq/core'
 import { Badge, Button, Tooltip, useClickOutside } from '@janhq/joi'
 import { useAtom } from 'jotai'
 import {

From ff123d50f21591050450d7b4aee83b7d681b1ce4 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 6 Nov 2024 10:17:39 +0700
Subject: [PATCH 53/71] ui: remove generation interrupted message (#3949)

---
 web/containers/ErrorMessage/index.test.tsx | 118 +++++++++------------
 web/containers/ErrorMessage/index.tsx      |  32 +-----
 2 files changed, 50 insertions(+), 100 deletions(-)

diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx
index 99dad5415..d2ae5aa81 100644
--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@@ -1,54 +1,43 @@
 // ErrorMessage.test.tsx
-import React from 'react';
-import { render, screen, fireEvent } from '@testing-library/react';
-import '@testing-library/jest-dom';
-import ErrorMessage from './index';
-import { ThreadMessage, MessageStatus, ErrorCode } from '@janhq/core';
-import { useAtomValue, useSetAtom } from 'jotai';
-import useSendChatMessage from '@/hooks/useSendChatMessage';
+import React from 'react'
+import { render, screen, fireEvent } from '@testing-library/react'
+import '@testing-library/jest-dom'
+import ErrorMessage from './index'
+import { ThreadMessage, MessageStatus, ErrorCode } from '@janhq/core'
+import { useAtomValue, useSetAtom } from 'jotai'
+import useSendChatMessage from '@/hooks/useSendChatMessage'
 
 // Mock the dependencies
 jest.mock('jotai', () => {
-    const originalModule = jest.requireActual('jotai')
-    return {
-      ...originalModule,
-      useAtomValue: jest.fn(),
-      useSetAtom: jest.fn(),
-    }
-  })
+  const originalModule = jest.requireActual('jotai')
+  return {
+    ...originalModule,
+    useAtomValue: jest.fn(),
+    useSetAtom: jest.fn(),
+  }
+})
 
 jest.mock('@/hooks/useSendChatMessage', () => ({
   __esModule: true,
   default: jest.fn(),
-}));
+}))
 
 describe('ErrorMessage Component', () => {
-  const mockSetMainState = jest.fn();
-  const mockSetSelectedSettingScreen = jest.fn();
-  const mockSetModalTroubleShooting = jest.fn();
-  const mockResendChatMessage = jest.fn();
+  const mockSetMainState = jest.fn()
+  const mockSetSelectedSettingScreen = jest.fn()
+  const mockSetModalTroubleShooting = jest.fn()
+  const mockResendChatMessage = jest.fn()
 
   beforeEach(() => {
-    jest.clearAllMocks();
-    (useAtomValue as jest.Mock).mockReturnValue([]);
-    (useSetAtom as jest.Mock).mockReturnValue(mockSetMainState);
-    (useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen);
-    (useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting);
-    (useSendChatMessage as jest.Mock).mockReturnValue({ resendChatMessage: mockResendChatMessage });
-  });
-
-  it('renders stopped message correctly', () => {
-    const message: ThreadMessage = {
-      id: '1',
-      status: MessageStatus.Stopped,
-      content: [{ text: { value: 'Test message' } }],
-    } as ThreadMessage;
-
-    render(<ErrorMessage message={message} />);
-    
-    expect(screen.getByText("Oops! The generation was interrupted. Let's give it another go!")).toBeInTheDocument();
-    expect(screen.getByText('Regenerate')).toBeInTheDocument();
-  });
+    jest.clearAllMocks()
+    ;(useAtomValue as jest.Mock).mockReturnValue([])
+    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetMainState)
+    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetSelectedSettingScreen)
+    ;(useSetAtom as jest.Mock).mockReturnValue(mockSetModalTroubleShooting)
+    ;(useSendChatMessage as jest.Mock).mockReturnValue({
+      resendChatMessage: mockResendChatMessage,
+    })
+  })
 
   it('renders error message with InvalidApiKey correctly', () => {
     const message: ThreadMessage = {
@@ -56,13 +45,13 @@ describe('ErrorMessage Component', () => {
       status: MessageStatus.Error,
       error_code: ErrorCode.InvalidApiKey,
       content: [{ text: { value: 'Invalid API Key' } }],
-    } as ThreadMessage;
+    } as ThreadMessage
 
-    render(<ErrorMessage message={message} />);
-    
-    expect(screen.getByTestId('invalid-API-key-error')).toBeInTheDocument();
-    expect(screen.getByText('Settings')).toBeInTheDocument();
-  });
+    render(<ErrorMessage message={message} />)
+
+    expect(screen.getByTestId('invalid-API-key-error')).toBeInTheDocument()
+    expect(screen.getByText('Settings')).toBeInTheDocument()
+  })
 
   it('renders general error message correctly', () => {
     const message: ThreadMessage = {
@@ -70,26 +59,15 @@ describe('ErrorMessage Component', () => {
       status: MessageStatus.Error,
       error_code: ErrorCode.Unknown,
       content: [{ text: { value: 'Unknown error occurred' } }],
-    } as ThreadMessage;
+    } as ThreadMessage
 
-    render(<ErrorMessage message={message} />);
-    
-    expect(screen.getByText("Apologies, something’s amiss!")).toBeInTheDocument();
-    expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument();
-  });
+    render(<ErrorMessage message={message} />)
 
-  it('calls regenerateMessage when Regenerate button is clicked', () => {
-    const message: ThreadMessage = {
-      id: '1',
-      status: MessageStatus.Stopped,
-      content: [{ text: { value: 'Test message' } }],
-    } as ThreadMessage;
-
-    render(<ErrorMessage message={message} />);
-    
-    fireEvent.click(screen.getByText('Regenerate'));
-    expect(mockResendChatMessage).toHaveBeenCalled();
-  });
+    expect(
+      screen.getByText('Apologies, something’s amiss!')
+    ).toBeInTheDocument()
+    expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument()
+  })
 
   it('opens troubleshooting modal when link is clicked', () => {
     const message: ThreadMessage = {
@@ -97,11 +75,11 @@ describe('ErrorMessage Component', () => {
       status: MessageStatus.Error,
       error_code: ErrorCode.Unknown,
       content: [{ text: { value: 'Unknown error occurred' } }],
-    } as ThreadMessage;
+    } as ThreadMessage
 
-    render(<ErrorMessage message={message} />);
-    
-    fireEvent.click(screen.getByText('troubleshooting assistance'));
-    expect(mockSetModalTroubleShooting).toHaveBeenCalledWith(true);
-  });
-});
+    render(<ErrorMessage message={message} />)
+
+    fireEvent.click(screen.getByText('troubleshooting assistance'))
+    expect(mockSetModalTroubleShooting).toHaveBeenCalledWith(true)
+  })
+})
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index bcd056b93..add2bd89b 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -4,9 +4,8 @@ import {
   MessageStatus,
   ThreadMessage,
 } from '@janhq/core'
-import { Button } from '@janhq/joi'
+
 import { useAtomValue, useSetAtom } from 'jotai'
-import { RefreshCcw } from 'lucide-react'
 
 import AutoLink from '@/containers/AutoLink'
 import ModalTroubleShooting, {
@@ -15,27 +14,17 @@ import ModalTroubleShooting, {
 
 import { MainViewState } from '@/constants/screens'
 
-import useSendChatMessage from '@/hooks/useSendChatMessage'
-
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
-import { getCurrentChatMessagesAtom } from '@/helpers/atoms/ChatMessage.atom'
+
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
 
 const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
-  const messages = useAtomValue(getCurrentChatMessagesAtom)
-  const { resendChatMessage } = useSendChatMessage()
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
   const activeThread = useAtomValue(activeThreadAtom)
 
-  const regenerateMessage = async () => {
-    const lastMessageIndex = messages.length - 1
-    const message = messages[lastMessageIndex]
-    resendChatMessage(message)
-  }
-
   const getErrorTitle = () => {
     switch (message.error_code) {
       case ErrorCode.Unknown:
@@ -77,23 +66,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
 
   return (
     <div className="mt-10">
-      {message.status === MessageStatus.Stopped && (
-        <div key={message.id} className="flex flex-col items-center">
-          <span className="mb-3 text-center font-medium text-[hsla(var(--text-secondary))]">
-            Oops! The generation was interrupted. Let&apos;s give it another go!
-          </span>
-          <Button
-            className="w-min"
-            theme="ghost"
-            variant="outline"
-            onClick={regenerateMessage}
-          >
-            <RefreshCcw size={14} className="" />
-            <span className="w-2" />
-            Regenerate
-          </Button>
-        </div>
-      )}
       {message.status === MessageStatus.Error && (
         <div
           key={message.id}

From 4445abfa05860ba08aff4039e69b13f59a43229b Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 6 Nov 2024 13:34:01 +0700
Subject: [PATCH 54/71] fix: update themes migrations (#3957)

---
 electron/utils/migration.ts | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/electron/utils/migration.ts b/electron/utils/migration.ts
index 52ee45ed0..7295fa15d 100644
--- a/electron/utils/migration.ts
+++ b/electron/utils/migration.ts
@@ -47,9 +47,6 @@ async function migrateThemes() {
   const themes = readdirSync(join(appResourcePath(), 'themes'))
   for (const theme of themes) {
     const themePath = join(appResourcePath(), 'themes', theme)
-    if (existsSync(themePath) && !lstatSync(themePath).isDirectory()) {
-      continue
-    }
     await checkAndMigrateTheme(theme, themePath)
   }
 }
@@ -64,21 +61,11 @@ async function checkAndMigrateTheme(
   )
   if (existingTheme) {
     const desTheme = join(janDataThemesFolder, existingTheme)
-    if (!existsSync(desTheme) || !lstatSync(desTheme).isDirectory()) return
-
-    const desThemeData = JSON.parse(
-      readFileSync(join(desTheme, 'theme.json'), 'utf-8')
-    )
-    const sourceThemeData = JSON.parse(
-      readFileSync(join(sourceThemePath, 'theme.json'), 'utf-8')
-    )
-    if (desThemeData.version !== sourceThemeData.version) {
-      console.debug('Updating theme', existingTheme)
-      rmdirSync(desTheme, { recursive: true })
-      cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
-        recursive: true,
-      })
-    }
+    console.debug('Updating theme', existingTheme)
+    rmdirSync(desTheme, { recursive: true })
+    cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {
+      recursive: true,
+    })
   } else {
     console.debug('Adding new theme', sourceThemeName)
     cpSync(sourceThemePath, join(janDataThemesFolder, sourceThemeName), {

From 56e35df84d404bd510d44fb9b9767354e2e5b452 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 6 Nov 2024 13:34:11 +0700
Subject: [PATCH 55/71] chore: clean dangling process on exit and relaunch

---
 .../inference-cortex-extension/src/index.ts   | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index fb2ee9a46..e099aae44 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -50,12 +50,18 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     // Run the process watchdog
     const systemInfo = await systemInformation()
+    await this.clean()
     await executeOnMain(NODE, 'run', systemInfo)
 
     this.queue.add(() => this.healthz())
+
+    window.addEventListener('beforeunload', () => {
+      this.clean()
+    })
   }
 
   onUnload(): void {
+    this.clean()
     executeOnMain(NODE, 'dispose')
     super.onUnload()
   }
@@ -132,6 +138,20 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       })
       .then(() => {})
   }
+
+  /**
+   * Clean cortex processes
+   * @returns
+   */
+  clean(): Promise<any> {
+    return ky
+      .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
+        timeout: 2000, // maximum 2 seconds
+      })
+      .catch(() => {
+        // Do nothing
+      })
+  }
 }
 
 /// Legacy

From 1f46c82ff7caa63ec88df9b4d1b89305b8204bc9 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 6 Nov 2024 13:41:16 +0700
Subject: [PATCH 56/71] fix: remove tooltip and update text color modelid API
 server page (#3959)

---
 .../LocalServerRightPanel/index.tsx           | 21 +++++++------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index 628a61512..60c64eafb 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -1,6 +1,6 @@
 import { useCallback, useEffect, useMemo, useState } from 'react'
 
-import { Accordion, AccordionItem, Input, Tooltip } from '@janhq/joi'
+import { Accordion, AccordionItem, Input } from '@janhq/joi'
 import { useAtomValue, useSetAtom } from 'jotai'
 import { AlertTriangleIcon, CheckIcon, CopyIcon, InfoIcon } from 'lucide-react'
 
@@ -99,7 +99,7 @@ const LocalServerRightPanel = () => {
         <div className="mt-2">
           <Input
             value={selectedModel?.id || ''}
-            className="cursor-pointer"
+            className="cursor-pointer text-[hsla(var(--text-secondary))]"
             readOnly
             suffixIcon={
               clipboard.copied ? (
@@ -108,17 +108,12 @@ const LocalServerRightPanel = () => {
                   className="text-[hsla(var(--success-bg))]"
                 />
               ) : (
-                <Tooltip
-                  trigger={
-                    <CopyIcon
-                      size={14}
-                      className="text-[hsla(var(--text-secondary))]"
-                      onClick={() => {
-                        clipboard.copy(selectedModel?.id)
-                      }}
-                    />
-                  }
-                  content="Copy Model ID"
+                <CopyIcon
+                  size={14}
+                  className="cursor-pointer text-[hsla(var(--text-secondary))]"
+                  onClick={() => {
+                    clipboard.copy(selectedModel?.id)
+                  }}
                 />
               )
             }

From 0154199161e5fa56e4d421af76e8842904146903 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Wed, 6 Nov 2024 13:41:49 +0700
Subject: [PATCH 57/71] fix: text alignment on import model dialog (#3958)

---
 .../ModelDownloadRow/index.tsx                         | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
index 9c2ff14a5..5d679913e 100644
--- a/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
+++ b/web/screens/Settings/HuggingFaceRepoDetailModal/ModelDownloadRow/index.tsx
@@ -118,7 +118,7 @@ const ModelDownloadRow: React.FC<Props> = ({
   return (
     <div className="flex flex-col gap-4 rounded border border-[hsla(var(--app-border))] p-3 md:flex-row md:items-center md:justify-between xl:w-full">
       <div className="flex max-w-[50%] justify-between">
-        <div className="flex">
+        <div className="flex min-w-[280px] max-w-[280px]">
           {quantization && (
             <Badge variant="soft" className="mr-1">
               {quantization}
@@ -133,9 +133,11 @@ const ModelDownloadRow: React.FC<Props> = ({
             {fileName}
           </h1>
         </div>
-        <Badge theme="secondary" className="hidden md:flex">
-          {toGibibytes(fileSize)}
-        </Badge>
+        <div className="md:min-w-[90px] md:max-w-[90px]">
+          <Badge theme="secondary" className="ml-4 hidden md:flex">
+            {toGibibytes(fileSize)}
+          </Badge>
+        </div>
       </div>
 
       {downloadedModel ? (

From 264720c71af2e61a051b0f535cb2f8dbaea26c72 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 6 Nov 2024 16:28:26 +0700
Subject: [PATCH 58/71] chore: support customized OpenAI model.json

---
 .../model-extension/src/legacy/model-json.ts       | 14 +++++++++++++-
 web/hooks/useModels.ts                             |  8 +++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
index c47b7c661..3cad6014b 100644
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@@ -1,5 +1,13 @@
 import { InferenceEngine, Model, fs, joinPath } from '@janhq/core'
 //// LEGACY MODEL FOLDER ////
+const LocalEngines = [
+  InferenceEngine.cortex,
+  InferenceEngine.cortex_llamacpp,
+  InferenceEngine.cortex_tensorrtllm,
+  InferenceEngine.cortex_onnx,
+  InferenceEngine.nitro_tensorrt_llm,
+  InferenceEngine.nitro,
+]
 /**
  * Scan through models folder and return downloaded models
  * @returns
@@ -57,7 +65,11 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
               !source.url.startsWith(`https://`)
           )
         )
-        if (existFiles.every((exist) => exist)) return model
+        if (
+          !LocalEngines.includes(model.engine) ||
+          existFiles.every((exist) => exist)
+        )
+          return model
 
         const result = await fs
           .readdirSync(await joinPath([_homeDir, dirName]))
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index c856f6e3c..400e02793 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -43,7 +43,13 @@ const useModels = () => {
         .models.values()
         .toArray()
         .filter((e) => !isLocalEngine(e.engine))
-      const toUpdate = [...localModels, ...remoteModels]
+      const toUpdate = [
+        ...localModels,
+        ...remoteModels.filter(
+          (e: Model) => !localModels.some((g: Model) => g.id === e.id)
+        ),
+      ]
+
       setDownloadedModels(toUpdate)
 
       let isUpdated = false

From d9504b6152c6b3f68521a8c5eff9ccb102a4359b Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Wed, 6 Nov 2024 17:24:20 +0700
Subject: [PATCH 59/71] chore: revert change app name (#3962)

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .../workflows/template-build-linux-x64.yml    | 14 +++++------
 .../workflows/template-build-macos-arm64.yml  | 16 ++++++-------
 .../workflows/template-build-macos-x64.yml    | 16 ++++++-------
 .../workflows/template-build-windows-x64.yml  | 23 ++++++++++---------
 4 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index afd5f6647..0280b1014 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -63,13 +63,13 @@ jobs:
           jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
-          chmod +x .github/scripts/rename-app.sh
-          .github/scripts/rename-app.sh ./electron/package.json nightly
-          chmod +x .github/scripts/rename-workspace.sh
-          .github/scripts/rename-workspace.sh ./package.json nightly
-          echo "------------------------"
-          cat ./electron/package.json
-          echo "------------------------"
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-build-macos-arm64.yml b/.github/workflows/template-build-macos-arm64.yml
index 46f884473..e23ee5ed5 100644
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@@ -78,14 +78,14 @@ jobs:
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
-          cat electron/package.json
-          chmod +x .github/scripts/rename-app.sh
-          .github/scripts/rename-app.sh ./electron/package.json nightly
-          chmod +x .github/scripts/rename-workspace.sh
-          .github/scripts/rename-workspace.sh ./package.json nightly
-          echo "------------------------"
-          cat ./electron/package.json
-          echo "------------------------"
+          # cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-build-macos-x64.yml b/.github/workflows/template-build-macos-x64.yml
index 7781eb630..06a9baaa1 100644
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@@ -78,14 +78,14 @@ jobs:
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
-          cat electron/package.json
-          chmod +x .github/scripts/rename-app.sh
-          .github/scripts/rename-app.sh ./electron/package.json nightly
-          chmod +x .github/scripts/rename-workspace.sh
-          .github/scripts/rename-workspace.sh ./package.json nightly
-          echo "------------------------"
-          cat ./electron/package.json
-          echo "------------------------"
+          # cat electron/package.json
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index 76db4aadc..c683392f5 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -79,17 +79,18 @@ jobs:
           jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
-          chmod +x .github/scripts/rename-app.sh
-          .github/scripts/rename-app.sh ./electron/package.json nightly
-          chmod +x .github/scripts/rename-workspace.sh
-          .github/scripts/rename-workspace.sh ./package.json nightly
-          chmod +x .github/scripts/rename-uninstaller.sh
-          .github/scripts/rename-uninstaller.sh nightly
-          echo "------------------------"
-          cat ./electron/package.json
-          echo "------------------------"
-          cat ./package.json
-          echo "------------------------"
+
+          # chmod +x .github/scripts/rename-app.sh
+          # .github/scripts/rename-app.sh ./electron/package.json nightly
+          # chmod +x .github/scripts/rename-workspace.sh
+          # .github/scripts/rename-workspace.sh ./package.json nightly
+          # chmod +x .github/scripts/rename-uninstaller.sh
+          # .github/scripts/rename-uninstaller.sh nightly
+          # echo "------------------------"
+          # cat ./electron/package.json
+          # echo "------------------------"
+          # cat ./package.json
+          # echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true

From e41bcffcefdde2a0df8ff055b681f2777b691988 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 7 Nov 2024 10:10:05 +0700
Subject: [PATCH 60/71] fix: export PATH env to engine destination folder to
 have additional dlls scoped

---
 extensions/inference-cortex-extension/src/node/index.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index a52de20bb..3816605d2 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -33,6 +33,11 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     addEnvPaths(path.join(appResourcePath(), 'shared'))
     addEnvPaths(executableOptions.binPath)
     addEnvPaths(executableOptions.enginePath)
+    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
+    // This is required for the cortex engine to run for now since dlls are not moved to the root
+    addEnvPaths(
+      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
+    )
 
     const dataFolderPath = getJanDataFolderPath()
     watchdog = new ProcessWatchdog(

From ced44973b821af829ab1cbfba136036ee72f15f8 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 7 Nov 2024 12:06:46 +0700
Subject: [PATCH 61/71] chore: queue server start and model load

---
 extensions/inference-cortex-extension/src/index.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e099aae44..2f65c3c09 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -11,7 +11,6 @@ import {
   executeOnMain,
   systemInformation,
   joinPath,
-  dirName,
   LocalOAIEngine,
   InferenceEngine,
   getJanDataFolderPath,
@@ -97,7 +96,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       model.settings = settings
     }
 
-    return await ky
+    return await this.queue.add(() => ky
       .post(`${CORTEX_API_URL}/v1/models/start`, {
         json: {
           ...extractModelLoadParams(model.settings),
@@ -112,7 +111,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       .catch(async (e) => {
         throw (await e.response?.json()) ?? e
       })
-      .then()
+      .then())
   }
 
   override async unloadModel(model: Model): Promise<void> {

From 40019892b875a8ca003afcf37aa474e22472b45f Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 7 Nov 2024 13:01:34 +0700
Subject: [PATCH 62/71] chore: correct name of bin subfolders to move dll
 properly

---
 extensions/inference-cortex-extension/download.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index e4d777ea2..ecff683c3 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -7,7 +7,7 @@ set /p CORTEX_VERSION=<./bin/version.txt
 set VERSION=v0.1.35
 set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
-set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan
+set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp

From 8090a7be24358239919c81dbf0eaa0713d73f513 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 7 Nov 2024 13:04:39 +0700
Subject: [PATCH 63/71] fix: make input clickable for copying instead of just
 the copy icon (#3964)

---
 web/screens/LocalServer/LocalServerRightPanel/index.tsx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index fd7f19f5f..8cb2af350 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -97,6 +97,9 @@ const LocalServerRightPanel = () => {
             value={selectedModel?.id || ''}
             className="cursor-pointer text-[hsla(var(--text-secondary))]"
             readOnly
+            onClick={() => {
+              clipboard.copy(selectedModel?.id)
+            }}
             suffixIcon={
               clipboard.copied ? (
                 <CheckIcon
@@ -107,9 +110,6 @@ const LocalServerRightPanel = () => {
                 <CopyIcon
                   size={14}
                   className="cursor-pointer text-[hsla(var(--text-secondary))]"
-                  onClick={() => {
-                    clipboard.copy(selectedModel?.id)
-                  }}
                 />
               )
             }

From 14d486f2990ebff737b20dfded5c7138791367fd Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 7 Nov 2024 13:05:06 +0700
Subject: [PATCH 64/71] fix: remove click outside system monitor panel (#3965)

---
 .../Layout/BottomPanel/SystemMonitor/index.tsx  | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
index ae91cfa9d..14055b535 100644
--- a/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
+++ b/web/containers/Layout/BottomPanel/SystemMonitor/index.tsx
@@ -1,7 +1,6 @@
 import { Fragment, useCallback, useState } from 'react'
 
 import { Progress } from '@janhq/joi'
-import { useClickOutside } from '@janhq/joi'
 import { useAtom, useAtomValue } from 'jotai'
 import {
   MonitorIcon,
@@ -44,23 +43,11 @@ const SystemMonitor = () => {
   const [showSystemMonitorPanel, setShowSystemMonitorPanel] = useAtom(
     showSystemMonitorPanelAtom
   )
-  const [control, setControl] = useState<HTMLDivElement | null>(null)
-  const [elementExpand, setElementExpand] = useState<HTMLDivElement | null>(
-    null
-  )
+
   const reduceTransparent = useAtomValue(reduceTransparentAtom)
 
   const { watch, stopWatching } = useGetSystemResources()
 
-  useClickOutside(
-    () => {
-      toggleShowSystemMonitorPanel(false)
-      setShowFullScreen(false)
-    },
-    null,
-    [control, elementExpand]
-  )
-
   const toggleShowSystemMonitorPanel = useCallback(
     (isShow: boolean) => {
       setShowSystemMonitorPanel(isShow)
@@ -76,7 +63,6 @@ const SystemMonitor = () => {
   return (
     <Fragment>
       <div
-        ref={setControl}
         data-testid="system-monitoring"
         className={twMerge(
           'flex cursor-pointer items-center gap-x-1 rounded px-1 py-0.5 hover:bg-[hsla(var(--secondary-bg))]',
@@ -92,7 +78,6 @@ const SystemMonitor = () => {
       </div>
       {showSystemMonitorPanel && (
         <div
-          ref={setElementExpand}
           className={twMerge(
             'fixed bottom-9 left-[49px] z-50 flex h-[200px] w-[calc(100%-48px)] flex-shrink-0 flex-col border-t border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))]',
             showFullScreen && 'h-[calc(100%-63px)]',

From b519c0814fb892e3d0168f7bf3a52d5ad500f8ae Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 7 Nov 2024 13:11:55 +0700
Subject: [PATCH 65/71] fix: codeblock responsive push right panel (#3967)

---
 web/styles/components/code-block.scss | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/styles/components/code-block.scss b/web/styles/components/code-block.scss
index c90684d28..e905e4b6c 100644
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@@ -70,7 +70,7 @@ pre > code {
   display: block;
   text-indent: 0;
   white-space: pre;
-  max-width: 40vw;
+  max-width: 10vw;
 }
 
 .hljs-emphasis {

From 2e9b7fdad28cce46e871aa80c33d830cf5bc0ccc Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 7 Nov 2024 13:34:09 +0700
Subject: [PATCH 66/71] chore: add import name for legacy models

---
 extensions/model-extension/src/index.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index e62e5b2ee..78c85cf0b 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -9,7 +9,7 @@ import {
   DownloadState,
   events,
   DownloadEvent,
-  OptionType
+  OptionType,
 } from '@janhq/core'
 import { CortexAPI } from './cortex'
 import { scanModelsFolder } from './legacy/model-json'
@@ -189,7 +189,8 @@ export default class JanModelExtension extends ModelExtension {
                     model.sources[0]?.url.split('/').pop() ??
                     model.id,
                 ]) // Copied models
-              : model.sources[0].url // Symlink models
+              : model.sources[0].url, // Symlink models,
+            model.name
           )
         )
       )

From 77034c4749d12730aebdc41e769f449db588b7ae Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 7 Nov 2024 14:18:18 +0700
Subject: [PATCH 67/71] fix: starter screen text alignment and missing model
 size (#3968)

---
 .../ChatBody/OnDeviceStarterScreen/index.tsx          | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
index 0b999c19d..0433a8688 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatBody/OnDeviceStarterScreen/index.tsx
@@ -221,19 +221,19 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                   return (
                     <div
                       key={featModel.id}
-                      className="my-2 flex items-center justify-between gap-2 border-b border-[hsla(var(--app-border))] pb-4 pt-1 last:border-none"
+                      className="my-2 flex items-start justify-between gap-2 border-b border-[hsla(var(--app-border))] pb-4 pt-1 last:border-none"
                     >
                       <div className="w-full text-left">
-                        <h6 className="font-medium">{featModel.name}</h6>
+                        <h6 className="mt-1.5 font-medium">{featModel.name}</h6>
                       </div>
 
                       {isDownloading ? (
-                        <div className="flex w-full items-center gap-2">
+                        <div className="flex w-full flex-col items-end gap-2">
                           {Object.values(downloadStates)
                             .filter((x) => x.modelId === featModel.id)
                             .map((item, i) => (
                               <div
-                                className="flex w-full items-center gap-2"
+                                className="mt-1.5 flex w-full items-center gap-2"
                                 key={i}
                               >
                                 <Progress
@@ -253,6 +253,9 @@ const OnDeviceStarterScreen = ({ extensionHasSettings }: Props) => {
                                 </div>
                               </div>
                             ))}
+                          <span className="text-[hsla(var(--text-secondary))]">
+                            {toGibibytes(featModel.metadata?.size)}
+                          </span>
                         </div>
                       ) : (
                         <div className="flex flex-col items-end justify-end gap-2">

From a773e169fccb9d3a9c9d23b44ca3d565cf2cb02a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 7 Nov 2024 14:26:41 +0700
Subject: [PATCH 68/71] fix: an edge case where auto import does not work with
 relative model file path

---
 extensions/model-extension/src/index.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 78c85cf0b..7d7514f3b 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -4,6 +4,7 @@ import {
   InferenceEngine,
   joinPath,
   dirName,
+  fs,
   ModelManager,
   abortDownload,
   DownloadState,
@@ -181,7 +182,8 @@ export default class JanModelExtension extends ModelExtension {
         toImportModels.map(async (model: Model & { file_path: string }) =>
           this.importModel(
             model.id,
-            model.sources[0].url.startsWith('http')
+            model.sources[0].url.startsWith('http') ||
+              !(await fs.existsSync(model.sources[0].url))
               ? await joinPath([
                   await dirName(model.file_path),
                   model.sources[0]?.filename ??

From 0847b32e87acbf419810cedba714354910635062 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 7 Nov 2024 14:29:27 +0700
Subject: [PATCH 69/71] fix: an edge case when start a model with relative
 model path

---
 .../inference-cortex-extension/src/index.ts   | 40 +++++++++++--------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 2f65c3c09..d070ff9a3 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -15,6 +15,7 @@ import {
   InferenceEngine,
   getJanDataFolderPath,
   extractModelLoadParams,
+  fs,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@@ -96,22 +97,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       model.settings = settings
     }
 
-    return await this.queue.add(() => ky
-      .post(`${CORTEX_API_URL}/v1/models/start`, {
-        json: {
-          ...extractModelLoadParams(model.settings),
-          model: model.id,
-          engine:
-            model.engine === InferenceEngine.nitro // Legacy model cache
-              ? InferenceEngine.cortex_llamacpp
-              : model.engine,
-        },
-      })
-      .json()
-      .catch(async (e) => {
-        throw (await e.response?.json()) ?? e
-      })
-      .then())
+    return await this.queue.add(() =>
+      ky
+        .post(`${CORTEX_API_URL}/v1/models/start`, {
+          json: {
+            ...extractModelLoadParams(model.settings),
+            model: model.id,
+            engine:
+              model.engine === InferenceEngine.nitro // Legacy model cache
+                ? InferenceEngine.cortex_llamacpp
+                : model.engine,
+          },
+        })
+        .json()
+        .catch(async (e) => {
+          throw (await e.response?.json()) ?? e
+        })
+        .then()
+    )
   }
 
   override async unloadModel(model: Model): Promise<void> {
@@ -159,7 +162,10 @@ export const getModelFilePath = async (
   file: string
 ): Promise<string> => {
   // Symlink to the model file
-  if (!model.sources[0]?.url.startsWith('http')) {
+  if (
+    !model.sources[0]?.url.startsWith('http') &&
+    (await fs.existsSync(model.sources[0].url))
+  ) {
     return model.sources[0]?.url
   }
   return joinPath([await getJanDataFolderPath(), 'models', model.id, file])

From 8d4734cb8a3f22f9530d195b6dbd4abc21a7abca Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 7 Nov 2024 16:08:56 +0700
Subject: [PATCH 70/71] fix: handle edge cases syntax highlight (#3969)

---
 .../LocalServerRightPanel/index.tsx           |  2 +-
 .../ChatInput/RichTextEditor.tsx              | 22 +++++++------------
 web/styles/components/code-block.scss         | 14 ++++++------
 web/styles/main.scss                          | 16 ++++++++++++++
 4 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index 8cb2af350..a59e83e7e 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -95,7 +95,7 @@ const LocalServerRightPanel = () => {
         <div className="mt-2">
           <Input
             value={selectedModel?.id || ''}
-            className="cursor-pointer text-[hsla(var(--text-secondary))]"
+            className="cursor-pointer text-[hsla(var(--text-secondary))] hover:border-[hsla(var(--app-border))] focus-visible:outline-0 focus-visible:ring-0"
             readOnly
             onClick={() => {
               clipboard.copy(selectedModel?.id)
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
index 87ea4e08f..096ef51e0 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@@ -132,20 +132,6 @@ const RichTextEditor = ({
       if (Editor.isBlock(editor, node) && node.type === 'paragraph') {
         node.children.forEach((child: { text: any }, childIndex: number) => {
           const text = child.text
-          const { selection } = editor
-
-          if (selection) {
-            const selectedNode = Editor.node(editor, selection)
-
-            if (Editor.isBlock(editor, selectedNode[0] as CustomElement)) {
-              const isNodeEmpty = Editor.string(editor, selectedNode[1]) === ''
-
-              if (isNodeEmpty) {
-                // Reset language when a node is cleared
-                currentLanguage.current = 'plaintext'
-              }
-            }
-          }
 
           // Match code block start and end
           const startMatch = text.match(/^```(\w*)$/)
@@ -346,6 +332,14 @@ const RichTextEditor = ({
           .join('\n')
 
         setCurrentPrompt(combinedText)
+        if (combinedText.trim() === '') {
+          currentLanguage.current = 'plaintext'
+        }
+        const hasCodeBlockStart = combinedText.match(/^```(\w*)/m)
+        // Set language to plaintext if no code block with language identifier is found
+        if (!hasCodeBlockStart) {
+          currentLanguage.current = 'plaintext'
+        }
       }}
     >
       <Editable
diff --git a/web/styles/components/code-block.scss b/web/styles/components/code-block.scss
index e905e4b6c..fed56884b 100644
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@@ -1,6 +1,6 @@
 .hljs-comment,
 .hljs-quote {
-  color: #d4d0ab;
+  color: var(--hljs-comment);
 }
 
 /* Red */
@@ -12,7 +12,7 @@
 .hljs-selector-class,
 .hljs-regexp,
 .hljs-deletion {
-  color: #ffa07a;
+  color: var(--hljs-variable);
 }
 
 /* Orange */
@@ -24,12 +24,12 @@
 .hljs-params,
 .hljs-meta,
 .hljs-link {
-  color: #f5ab35;
+  color: var(--hljs-number);
 }
 
 /* Yellow */
 .hljs-attribute {
-  color: #ffd700;
+  color: var(--hljs-attribute);
 }
 
 /* Green */
@@ -37,19 +37,19 @@
 .hljs-symbol,
 .hljs-bullet,
 .hljs-addition {
-  color: #abe338;
+  color: var(--hljs-string);
 }
 
 /* Blue */
 .hljs-title,
 .hljs-section {
-  color: #00e0e0;
+  color: var(--hljs-title);
 }
 
 /* Purple */
 .hljs-keyword,
 .hljs-selector-tag {
-  color: #dcc6e0;
+  color: var(--hljs-keyword);
 }
 
 .hljs {
diff --git a/web/styles/main.scss b/web/styles/main.scss
index 8e952af5c..a7e85fcb0 100644
--- a/web/styles/main.scss
+++ b/web/styles/main.scss
@@ -40,6 +40,14 @@
   --text-secondary: 0, 0%, 0%, 0.6;
   --text-tertiary: 0, 0%, 0%, 0.4;
   --text-quaternary: 0, 0%, 0%, 0.2;
+
+  --hljs-comment: #6e7781;
+  --hljs-variable: #cf222e;
+  --hljs-number: #bc4c00;
+  --hljs-attribute: #b58407;
+  --hljs-string: #116329;
+  --hljs-title: #0550ae;
+  --hljs-keyword: #8250df;
 }
 
 html.dark {
@@ -68,4 +76,12 @@ html.dark {
   --text-secondary: 0, 0%, 68%, 1;
   --text-tertiary: 0, 0%, 68%, 0.4;
   --text-quaternary: 0, 0%, 68%, 0.2;
+
+  --hljs-comment: #8b949e;
+  --hljs-variable: #ff7b72;
+  --hljs-number: #f0883e;
+  --hljs-attribute: #ffa657;
+  --hljs-string: #7ee787;
+  --hljs-title: #79c0ff;
+  --hljs-keyword: #d2a8ff;
 }

From 766a4e8e8a64033bdf633dfbd5da120ffc290387 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 7 Nov 2024 16:47:50 +0700
Subject: [PATCH 71/71] fix: right panel bg joi-dark and remove n_pararell
 params (#3970)

---
 themes/joi-dark/theme.json              |  2 +-
 web/screens/Settings/Advanced/index.tsx |  1 +
 web/utils/predefinedComponent.ts        | 15 ---------------
 3 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/themes/joi-dark/theme.json b/themes/joi-dark/theme.json
index 22ed778ba..c299e3b1e 100644
--- a/themes/joi-dark/theme.json
+++ b/themes/joi-dark/theme.json
@@ -64,7 +64,7 @@
     },
 
     "right-panel": {
-      "bg": "0, 0%, 13%, 0"
+      "bg": "0, 0%, 13%, 1"
     },
 
     "tooltip": {
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 475cc4d58..50e2a72a6 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -209,6 +209,7 @@ const Advanced = () => {
     }
     setGpusInUse(updatedGpusInUse)
     saveSettings({ gpusInUse: updatedGpusInUse })
+    window.core?.api?.relaunch()
   }
 
   const gpuSelectionPlaceHolder =
diff --git a/web/utils/predefinedComponent.ts b/web/utils/predefinedComponent.ts
index 82087f43b..3a9f45e92 100644
--- a/web/utils/predefinedComponent.ts
+++ b/web/utils/predefinedComponent.ts
@@ -145,21 +145,6 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
     requireModelReload: false,
     configType: 'runtime',
   },
-  n_parallel: {
-    key: 'n_parallel',
-    title: 'N Parallel',
-    description:
-      'The number of parallel operations. Only set when enable continuous batching.	',
-    controllerType: 'slider',
-    controllerProps: {
-      min: 0,
-      max: 4,
-      step: 1,
-      value: 1,
-    },
-    requireModelReload: true,
-    configType: 'setting',
-  },
   cpu_threads: {
     key: 'cpu_threads',
     title: 'CPU Threads',