From 11322e189fbb62977f63baa123956dc984f36aaa Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Wed, 13 Nov 2024 21:28:01 +0700
Subject: [PATCH 01/46] Revert "chore: enable notification nightly" (#4011)

* Revert "chore: enable notification nightly (#3963)"

This reverts commit 102d8b08c15b4ef2349ee38018bcb7c863be82e3.

* chore: revert #3962

* chore: new nightly url for discord notification

---------

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .../workflows/jan-electron-build-nightly.yml  |  5 ++--
 .../workflows/template-build-linux-x64.yml    | 16 ++++++-------
 .../workflows/template-build-macos-arm64.yml  | 16 ++++++-------
 .../workflows/template-build-macos-x64.yml    | 18 +++++++-------
 .../workflows/template-build-windows-x64.yml  | 24 +++++++++----------
 ...ate-noti-discord-and-update-url-readme.yml | 10 ++++----
 6 files changed, 44 insertions(+), 45 deletions(-)

diff --git a/.github/workflows/jan-electron-build-nightly.yml b/.github/workflows/jan-electron-build-nightly.yml
index d79080990..73dc6524b 100644
--- a/.github/workflows/jan-electron-build-nightly.yml
+++ b/.github/workflows/jan-electron-build-nightly.yml
@@ -114,15 +114,14 @@ jobs:
       - name: Upload latest-mac.yml
         if: ${{ needs.set-public-provider.outputs.public_provider == 'aws-s3' }}
         run: |
-          aws s3 cp ./latest-mac.yml "s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-latest/latest-mac.yml"
-          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-latest/ s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/latest/
+          aws s3 cp ./latest-mac.yml "s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-nightly/latest-mac.yml"
+          aws s3 sync s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/temp-nightly/ s3://${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}/nightly/
         env:
           AWS_ACCESS_KEY_ID: ${{ secrets.DELTA_AWS_ACCESS_KEY_ID }}
           AWS_SECRET_ACCESS_KEY: ${{ secrets.DELTA_AWS_SECRET_ACCESS_KEY }}
           AWS_DEFAULT_REGION: ${{ secrets.DELTA_AWS_REGION }}
           AWS_EC2_METADATA_DISABLED: "true"
 
-    
   noti-discord-nightly-and-update-url-readme:
     needs: [build-macos-x64, build-macos-arm64, build-windows-x64, build-linux-x64, get-update-version, set-public-provider, combine-latest-mac-yml]
     secrets: inherit
diff --git a/.github/workflows/template-build-linux-x64.yml b/.github/workflows/template-build-linux-x64.yml
index 92188c364..afd5f6647 100644
--- a/.github/workflows/template-build-linux-x64.yml
+++ b/.github/workflows/template-build-linux-x64.yml
@@ -60,16 +60,16 @@ jobs:
           mv /tmp/package.json electron/package.json
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
-          # echo "------------------------"
-          # cat ./electron/package.json
-          # echo "------------------------"
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-build-macos-arm64.yml b/.github/workflows/template-build-macos-arm64.yml
index a23e34cf9..e618afb53 100644
--- a/.github/workflows/template-build-macos-arm64.yml
+++ b/.github/workflows/template-build-macos-arm64.yml
@@ -72,20 +72,20 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           # cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
-          # echo "------------------------"
-          # cat ./electron/package.json
-          # echo "------------------------"
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-build-macos-x64.yml b/.github/workflows/template-build-macos-x64.yml
index 18309fca0..7781eb630 100644
--- a/.github/workflows/template-build-macos-x64.yml
+++ b/.github/workflows/template-build-macos-x64.yml
@@ -72,20 +72,20 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq --arg teamid "${{ secrets.APPLE_TEAM_ID }}" '.build.mac.notarize.teamId = $teamid' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
-          # cat electron/package.json
-          # chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
-          # echo "------------------------"
-          # cat ./electron/package.json
-          # echo "------------------------"
+          cat electron/package.json
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-build-windows-x64.yml b/.github/workflows/template-build-windows-x64.yml
index 2a1d3f15b..488366a6d 100644
--- a/.github/workflows/template-build-windows-x64.yml
+++ b/.github/workflows/template-build-windows-x64.yml
@@ -73,24 +73,24 @@ jobs:
           jq --arg version "${{ inputs.new_version }}" '.version = $version' web/package.json > /tmp/package.json
           mv /tmp/package.json web/package.json
 
-          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/latest", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-latest", "channel": "latest"}]' electron/package.json > /tmp/package.json
+          jq '.build.publish = [{"provider": "generic", "url": "https://delta.jan.ai/nightly", "channel": "latest"}, {"provider": "s3", "acl": null, "bucket": "${{ secrets.DELTA_AWS_S3_BUCKET_NAME }}", "region": "${{ secrets.DELTA_AWS_REGION}}", "path": "temp-nightly", "channel": "latest"}]' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
 
           jq '.build.win.sign = "./sign.js"' electron/package.json > /tmp/package.json
           mv /tmp/package.json electron/package.json
           cat electron/package.json
 
-          # chmod +x .github/scripts/rename-app.sh
-          # .github/scripts/rename-app.sh ./electron/package.json nightly
-          # chmod +x .github/scripts/rename-workspace.sh
-          # .github/scripts/rename-workspace.sh ./package.json nightly
-          # chmod +x .github/scripts/rename-uninstaller.sh
-          # .github/scripts/rename-uninstaller.sh nightly
-          # echo "------------------------"
-          # cat ./electron/package.json
-          # echo "------------------------"
-          # cat ./package.json
-          # echo "------------------------"
+          chmod +x .github/scripts/rename-app.sh
+          .github/scripts/rename-app.sh ./electron/package.json nightly
+          chmod +x .github/scripts/rename-workspace.sh
+          .github/scripts/rename-workspace.sh ./package.json nightly
+          chmod +x .github/scripts/rename-uninstaller.sh
+          .github/scripts/rename-uninstaller.sh nightly
+          echo "------------------------"
+          cat ./electron/package.json
+          echo "------------------------"
+          cat ./package.json
+          echo "------------------------"
 
       - name: Change App Name for beta version
         if: inputs.beta == true
diff --git a/.github/workflows/template-noti-discord-and-update-url-readme.yml b/.github/workflows/template-noti-discord-and-update-url-readme.yml
index c419a3a9a..a53c20be5 100644
--- a/.github/workflows/template-noti-discord-and-update-url-readme.yml
+++ b/.github/workflows/template-noti-discord-and-update-url-readme.yml
@@ -47,11 +47,11 @@ jobs:
         with:
           args: |
             Jan App ${{ inputs.build_reason }} build artifact version {{ VERSION }}:
-            - Windows: https://delta.jan.ai/latest/jan-win-x64-{{ VERSION }}.exe
-            - macOS Intel: https://delta.jan.ai/latest/jan-mac-x64-{{ VERSION }}.dmg
-            - macOS Apple Silicon: https://delta.jan.ai/latest/jan-mac-arm64-{{ VERSION }}.dmg
-            - Linux Deb: https://delta.jan.ai/latest/jan-linux-amd64-{{ VERSION }}.deb
-            - Linux AppImage: https://delta.jan.ai/latest/jan-linux-x86_64-{{ VERSION }}.AppImage
+            - Windows: https://delta.jan.ai/nightly/jan-win-x64-{{ VERSION }}.exe
+            - macOS Intel: https://delta.jan.ai/nightly/jan-mac-x64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-mac-arm64-{{ VERSION }}.dmg
+            - Linux Deb: https://delta.jan.ai/nightly/jan-linux-amd64-{{ VERSION }}.deb
+            - Linux AppImage: https://delta.jan.ai/nightly/jan-linux-x86_64-{{ VERSION }}.AppImage
             - Github action run: https://github.com/janhq/jan/actions/runs/{{ GITHUB_RUN_ID }}
         env:
           DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
\ No newline at end of file

From 8a81678bb46becc45d817a1d9fdb48ce9d84fb27 Mon Sep 17 00:00:00 2001
From: hiento09 <136591877+hiento09@users.noreply.github.com>
Date: Thu, 14 Nov 2024 09:46:00 +0700
Subject: [PATCH 02/46] chore: correct discord message notification (#4018)

Co-authored-by: Hien To <tominhhien97@gmail.com>
---
 .../template-noti-discord-and-update-url-readme.yml    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/template-noti-discord-and-update-url-readme.yml b/.github/workflows/template-noti-discord-and-update-url-readme.yml
index a53c20be5..d799dacfa 100644
--- a/.github/workflows/template-noti-discord-and-update-url-readme.yml
+++ b/.github/workflows/template-noti-discord-and-update-url-readme.yml
@@ -47,11 +47,11 @@ jobs:
         with:
           args: |
             Jan App ${{ inputs.build_reason }} build artifact version {{ VERSION }}:
-            - Windows: https://delta.jan.ai/nightly/jan-win-x64-{{ VERSION }}.exe
-            - macOS Intel: https://delta.jan.ai/nightly/jan-mac-x64-{{ VERSION }}.dmg
-            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-mac-arm64-{{ VERSION }}.dmg
-            - Linux Deb: https://delta.jan.ai/nightly/jan-linux-amd64-{{ VERSION }}.deb
-            - Linux AppImage: https://delta.jan.ai/nightly/jan-linux-x86_64-{{ VERSION }}.AppImage
+            - Windows: https://delta.jan.ai/nightly/jan-nightly-win-x64-{{ VERSION }}.exe
+            - macOS Intel: https://delta.jan.ai/nightly/jan-nightly-mac-x64-{{ VERSION }}.dmg
+            - macOS Apple Silicon: https://delta.jan.ai/nightly/jan-nightly-mac-arm64-{{ VERSION }}.dmg
+            - Linux Deb: https://delta.jan.ai/nightly/jan-nightly-linux-amd64-{{ VERSION }}.deb
+            - Linux AppImage: https://delta.jan.ai/nightly/jan-nightly-linux-x86_64-{{ VERSION }}.AppImage
             - Github action run: https://github.com/janhq/jan/actions/runs/{{ GITHUB_RUN_ID }}
         env:
           DISCORD_WEBHOOK: ${{ secrets.DISCORD_WEBHOOK }}
\ No newline at end of file

From e196aefcd3381801d291aa576fb9c6f5d6434f06 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 14 Nov 2024 14:46:35 +0700
Subject: [PATCH 03/46] feat: new UI code block and Enable copying of code
 blocks or plain text mid-stream (#4010)

* feat: improvement ui codeblock

* chore: update ui code block

* chore: finalize ui code block and latex

* chore: fix jest testing and cleanup unused deps
---
 web/containers/ListContainer/index.tsx        |   5 +
 web/jest.config.js                            |   5 +
 web/mock/empty-mock.tsx                       |   2 +
 web/package.json                              |  26 +-
 .../ChatInput/RichTextEditor.tsx              |   5 +-
 .../SimpleTextMessage/index.tsx               | 278 ++++++++++++------
 web/styles/components/code-block.scss         |  78 ++++-
 web/styles/components/message.scss            |   8 -
 web/utils/codeLanguageExtension.ts            |  34 +++
 9 files changed, 326 insertions(+), 115 deletions(-)
 create mode 100644 web/mock/empty-mock.tsx
 create mode 100644 web/utils/codeLanguageExtension.ts

diff --git a/web/containers/ListContainer/index.tsx b/web/containers/ListContainer/index.tsx
index bd650e315..2b720fb43 100644
--- a/web/containers/ListContainer/index.tsx
+++ b/web/containers/ListContainer/index.tsx
@@ -25,6 +25,11 @@ const ListContainer = ({ children }: Props) => {
         isUserManuallyScrollingUp.current = false
       }
     }
+
+    if (isUserManuallyScrollingUp.current === true) {
+      event.preventDefault()
+      event.stopPropagation()
+    }
     prevScrollTop.current = currentScrollTop
   }, [])
 
diff --git a/web/jest.config.js b/web/jest.config.js
index 12ed39b20..f78007532 100644
--- a/web/jest.config.js
+++ b/web/jest.config.js
@@ -13,6 +13,11 @@ const config = {
   moduleNameMapper: {
     // ...
     '^@/(.*)$': '<rootDir>/$1',
+    'react-markdown': '<rootDir>/mock/empty-mock.tsx',
+    'rehype-highlight': '<rootDir>/mock/empty-mock.tsx',
+    'rehype-katex': '<rootDir>/mock/empty-mock.tsx',
+    'rehype-raw': '<rootDir>/mock/empty-mock.tsx',
+    'remark-math': '<rootDir>/mock/empty-mock.tsx',
   },
   // Add more setup options before each test is run
   // setupFilesAfterEnv: ['<rootDir>/jest.setup.ts'],
diff --git a/web/mock/empty-mock.tsx b/web/mock/empty-mock.tsx
new file mode 100644
index 000000000..dd7f322f2
--- /dev/null
+++ b/web/mock/empty-mock.tsx
@@ -0,0 +1,2 @@
+const EmptyMock = {}
+export default EmptyMock
diff --git a/web/package.json b/web/package.json
index d3ee82a33..7665d354c 100644
--- a/web/package.json
+++ b/web/package.json
@@ -14,13 +14,10 @@
     "test": "jest"
   },
   "dependencies": {
-    "@heroicons/react": "^2.0.18",
-    "@hookform/resolvers": "^3.3.2",
     "@janhq/core": "link:./core",
     "@janhq/joi": "link:./joi",
     "autoprefixer": "10.4.16",
     "class-variance-authority": "^0.7.0",
-    "csstype": "^3.0.10",
     "framer-motion": "^10.16.4",
     "highlight.js": "^11.9.0",
     "jotai": "^2.6.0",
@@ -28,8 +25,6 @@
     "lodash": "^4.17.21",
     "lucide-react": "^0.291.0",
     "marked": "^9.1.2",
-    "marked-highlight": "^2.0.6",
-    "marked-katex-extension": "^5.0.2",
     "next": "14.2.3",
     "next-themes": "^0.2.1",
     "postcss": "8.4.31",
@@ -39,22 +34,25 @@
     "react-circular-progressbar": "^2.1.0",
     "react-dom": "18.2.0",
     "react-dropzone": "14.2.3",
-    "react-hook-form": "^7.47.0",
     "react-hot-toast": "^2.4.1",
     "react-icons": "^4.12.0",
-    "react-scroll-to-bottom": "^4.2.0",
+    "react-markdown": "^9.0.1",
     "react-toastify": "^9.1.3",
+    "rehype-highlight": "^7.0.1",
+    "rehype-highlight-code-lines": "^1.0.4",
+    "rehype-katex": "^7.0.1",
+    "rehype-raw": "^7.0.0",
+    "remark-math": "^6.0.0",
     "sass": "^1.69.4",
+    "slate": "latest",
+    "slate-dom": "0.111.0",
+    "slate-history": "0.110.3",
+    "slate-react": "0.110.3",
     "tailwind-merge": "^2.0.0",
     "tailwindcss": "3.3.5",
     "ulidx": "^2.3.0",
     "use-debounce": "^10.0.0",
-    "uuid": "^9.0.1",
-    "zod": "^3.22.4",
-    "slate": "latest",
-    "slate-dom": "0.111.0",
-    "slate-react": "0.110.3",
-    "slate-history": "0.110.3"
+    "uuid": "^9.0.1"
   },
   "devDependencies": {
     "@next/eslint-plugin-next": "^14.0.1",
@@ -65,7 +63,7 @@
     "@types/react": "18.2.34",
     "@types/react-dom": "18.2.14",
     "@types/react-icons": "^3.0.0",
-    "@types/react-scroll-to-bottom": "^4.2.4",
+    "@types/react-syntax-highlighter": "^15.5.13",
     "@types/uuid": "^9.0.6",
     "@typescript-eslint/eslint-plugin": "^6.8.0",
     "@typescript-eslint/parser": "^6.8.0",
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
index 70fecb8a9..0d477d78d 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/RichTextEditor.tsx
@@ -3,8 +3,8 @@ import { useCallback, useEffect, useRef, useState } from 'react'
 
 import { MessageStatus } from '@janhq/core'
 import hljs from 'highlight.js'
-
 import { useAtom, useAtomValue } from 'jotai'
+
 import { BaseEditor, createEditor, Editor, Transforms } from 'slate'
 import { withHistory } from 'slate-history' // Import withHistory
 import {
@@ -270,7 +270,8 @@ const RichTextEditor = ({
       textareaRef.current.style.height = activeSettingInputBox
         ? '100px'
         : '40px'
-      textareaRef.current.style.height = textareaRef.current.scrollHeight + 'px'
+      textareaRef.current.style.height =
+        textareaRef.current.scrollHeight + 2 + 'px'
       textareaRef.current?.scrollTo({
         top: textareaRef.current.scrollHeight,
         behavior: 'instant',
diff --git a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
index 12bcf7a4d..126512115 100644
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@@ -1,4 +1,9 @@
-import React, { useEffect, useRef, useState } from 'react'
+/* eslint-disable @typescript-eslint/no-explicit-any */
+/* eslint-disable react-hooks/exhaustive-deps */
+/* eslint-disable @typescript-eslint/naming-convention */
+import React, { useEffect, useState } from 'react'
+
+import Markdown from 'react-markdown'
 
 import {
   ChatCompletionRole,
@@ -8,14 +13,15 @@ import {
 } from '@janhq/core'
 
 import { Tooltip } from '@janhq/joi'
-import hljs from 'highlight.js'
 
 import { useAtomValue } from 'jotai'
 import { FolderOpenIcon } from 'lucide-react'
-import { Marked, Renderer } from 'marked'
-import { markedHighlight } from 'marked-highlight'
-import markedKatex from 'marked-katex-extension'
-
+import rehypeHighlight from 'rehype-highlight'
+import rehypeHighlightCodeLines from 'rehype-highlight-code-lines'
+import rehypeKatex from 'rehype-katex'
+import rehypeRaw from 'rehype-raw'
+import remarkMath from 'remark-math'
+import 'katex/dist/katex.min.css'
 import { twMerge } from 'tailwind-merge'
 
 import LogoMark from '@/containers/Brand/Logo/Mark'
@@ -23,6 +29,7 @@ import LogoMark from '@/containers/Brand/Logo/Mark'
 import { useClipboard } from '@/hooks/useClipboard'
 import { usePath } from '@/hooks/usePath'
 
+import { getLanguageFromExtension } from '@/utils/codeLanguageExtension'
 import { toGibibytes } from '@/utils/converter'
 import { displayDate } from '@/utils/datetime'
 
@@ -53,88 +60,181 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
 
   const clipboard = useClipboard({ timeout: 1000 })
 
-  function escapeHtml(html: string): string {
-    return html
-      .replace(/&/g, '&amp;')
-      .replace(/</g, '&lt;')
-      .replace(/>/g, '&gt;')
-      .replace(/"/g, '&quot;')
-      .replace(/'/g, '&#039;')
+  function extractCodeLines(node: { children: { children: any[] }[] }) {
+    const codeLines: any[] = []
+
+    // Helper function to extract text recursively from children
+    function getTextFromNode(node: {
+      type: string
+      value: any
+      children: any[]
+    }): string {
+      if (node.type === 'text') {
+        return node.value
+      } else if (node.children) {
+        return node.children.map(getTextFromNode).join('')
+      }
+      return ''
+    }
+
+    // Traverse each line in the <code> block
+    node.children[0].children.forEach(
+      (lineNode: {
+        type: string
+        tagName: string
+        value: any
+        children: any[]
+      }) => {
+        if (lineNode.type === 'element' && lineNode.tagName === 'span') {
+          const lineContent = getTextFromNode(lineNode)
+          codeLines.push(lineContent)
+        }
+      }
+    )
+
+    // Join the lines with newline characters for proper formatting
+    return codeLines.join('\n')
+  }
+  function wrapCodeBlocksWithoutVisit() {
+    return (tree: { children: any[] }) => {
+      tree.children = tree.children.map((node) => {
+        if (node.tagName === 'pre' && node.children[0]?.tagName === 'code') {
+          const language = node.children[0].properties.className?.[1]?.replace(
+            'language-',
+            ''
+          )
+
+          if (!language) return node
+
+          return {
+            type: 'element',
+            tagName: 'div',
+            properties: {
+              className: ['code-block-wrapper'],
+            },
+            children: [
+              {
+                type: 'element',
+                tagName: 'div',
+                properties: {
+                  className: [
+                    'code-block',
+                    'group/item',
+                    'relative',
+                    'my-4',
+                    'overflow-auto',
+                  ],
+                },
+                children: [
+                  {
+                    type: 'element',
+                    tagName: 'div',
+                    properties: {
+                      className:
+                        'code-header bg-[hsla(var(--app-code-block))] flex justify-between items-center py-2 px-3 border-b border-[hsla(var(--app-border))] rounded-t-lg',
+                    },
+                    children: [
+                      {
+                        type: 'element',
+                        tagName: 'span',
+                        properties: {
+                          className: 'text-xs font-medium text-gray-300',
+                        },
+                        children: [
+                          {
+                            type: 'text',
+                            value: language
+                              ? `${getLanguageFromExtension(language)}`
+                              : 'No file name',
+                          },
+                        ],
+                      },
+                      {
+                        type: 'element',
+                        tagName: 'button',
+                        properties: {
+                          className:
+                            'copy-button ml-auto flex items-center gap-1 text-xs font-medium text-gray-400 hover:text-gray-600 focus:outline-none',
+                          onClick: (event: Event) => {
+                            clipboard.copy(extractCodeLines(node))
+
+                            const button = event.currentTarget as HTMLElement
+                            button.innerHTML = `
+                              <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-check pointer-events-none text-green-600"><path d="M20 6 9 17l-5-5"/></svg>
+                              <span>Copied</span>
+                            `
+
+                            setTimeout(() => {
+                              button.innerHTML = `
+                                <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy pointer-events-none text-gray-400"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>
+                                <span>Copy</span>
+                              `
+                            }, 2000)
+                          },
+                        },
+                        children: [
+                          {
+                            type: 'element',
+                            tagName: 'svg',
+                            properties: {
+                              xmlns: 'http://www.w3.org/2000/svg',
+                              width: '16',
+                              height: '16',
+                              viewBox: '0 0 24 24',
+                              fill: 'none',
+                              stroke: 'currentColor',
+                              strokeWidth: '2',
+                              strokeLinecap: 'round',
+                              strokeLinejoin: 'round',
+                              className:
+                                'lucide lucide-copy pointer-events-none text-gray-400',
+                            },
+                            children: [
+                              {
+                                type: 'element',
+                                tagName: 'rect',
+                                properties: {
+                                  width: '14',
+                                  height: '14',
+                                  x: '8',
+                                  y: '8',
+                                  rx: '2',
+                                  ry: '2',
+                                },
+                                children: [],
+                              },
+                              {
+                                type: 'element',
+                                tagName: 'path',
+                                properties: {
+                                  d: 'M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2',
+                                },
+                                children: [],
+                              },
+                            ],
+                          },
+                          { type: 'text', value: 'Copy' },
+                        ],
+                      },
+                    ],
+                  },
+                  node,
+                ],
+              },
+            ],
+          }
+        }
+        return node
+      })
+    }
   }
 
-  const marked: Marked = new Marked(
-    markedHighlight({
-      langPrefix: 'hljs',
-      highlight(code, lang) {
-        if (lang === undefined || lang === '') {
-          return hljs.highlight(code, { language: 'plaintext' }).value
-        }
-        try {
-          return hljs.highlight(code, { language: lang }).value
-        } catch (err) {
-          return hljs.highlight(code, { language: 'javascript' }).value
-        }
-      },
-    }),
-    {
-      renderer: {
-        html: (html: string) => {
-          return escapeHtml(html) // Escape any HTML
-        },
-        link: (href, title, text) => {
-          return Renderer.prototype.link
-            ?.apply(this, [href, title, text])
-            .replace('<a', "<a target='_blank'")
-        },
-        code(code, lang) {
-          return `
-          <div class="relative code-block group/item overflow-auto">
-            <button class='text-xs copy-action hidden group-hover/item:block p-2 rounded-lg absolute top-6 right-2'>
-              ${
-                clipboard.copied
-                  ? `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-check pointer-events-none text-green-600"><path d="M20 6 9 17l-5-5"/></svg>`
-                  : `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy pointer-events-none text-gray-400"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`
-              }
-            </button>
-            <pre class="hljs">
-              <code class="language-${lang ?? ''}">${code}</code>
-            </pre>
-          </div>
-          `
-        },
-      },
-    }
-  )
-
-  marked.use(markedKatex({ throwOnError: false }))
-
   const { onViewFile, onViewFileContainer } = usePath()
-  const parsedText = marked.parse(text)
   const [tokenCount, setTokenCount] = useState(0)
   const [lastTimestamp, setLastTimestamp] = useState<number | undefined>()
   const [tokenSpeed, setTokenSpeed] = useState(0)
   const messages = useAtomValue(getCurrentChatMessagesAtom)
 
-  const codeBlockCopyEvent = useRef((e: Event) => {
-    const target: HTMLElement = e.target as HTMLElement
-    if (typeof target.className !== 'string') return null
-
-    const isCopyActionClassName = target?.className.includes('copy-action')
-
-    if (isCopyActionClassName) {
-      const content = target?.parentNode?.querySelector('code')?.innerText ?? ''
-      clipboard.copy(content)
-    }
-  })
-
-  useEffect(() => {
-    document.addEventListener('click', codeBlockCopyEvent.current)
-    return () => {
-      // eslint-disable-next-line react-hooks/exhaustive-deps
-      document.removeEventListener('click', codeBlockCopyEvent.current)
-    }
-  }, [])
-
   useEffect(() => {
     if (props.status !== MessageStatus.Pending) {
       return
@@ -285,8 +385,22 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
               className={twMerge(
                 'message max-width-[100%] flex flex-col gap-y-2 overflow-auto break-all leading-relaxed	'
               )}
-              dangerouslySetInnerHTML={{ __html: parsedText }}
-            />
+              dir="ltr"
+            >
+              <Markdown
+                remarkPlugins={[remarkMath]}
+                rehypePlugins={[
+                  [rehypeKatex, { throwOnError: false }],
+                  rehypeRaw,
+                  rehypeHighlight,
+                  [rehypeHighlightCodeLines, { showLineNumbers: true }],
+                  wrapCodeBlocksWithoutVisit,
+                ]}
+                skipHtml={true}
+              >
+                {text}
+              </Markdown>
+            </div>
           )}
         </>
       </div>
diff --git a/web/styles/components/code-block.scss b/web/styles/components/code-block.scss
index fed56884b..e739e4e24 100644
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@@ -55,22 +55,25 @@
 .hljs {
   overflow: auto;
   display: block;
-  width: auto;
-  background: hsla(var(--app-code-block));
-  color: #f8f8f2;
   padding: 16px;
   font-size: 14px;
-  word-wrap: normal;
+  border-bottom-left-radius: 0.4rem;
+  border-bottom-right-radius: 0.4rem;
+  color: #f8f8f2;
+}
+
+pre {
+  background: hsla(var(--app-code-block));
+  overflow: auto;
+  padding: 8px 16px;
   border-radius: 0.4rem;
-  margin-top: 1rem;
-  margin-bottom: 1rem;
-  white-space: normal;
 }
 pre > code {
-  display: block;
   text-indent: 0;
   white-space: pre;
-  max-width: 10vw;
+  font-size: 14px;
+  overflow: auto;
+  color: #f8f8f2;
 }
 
 .hljs-emphasis {
@@ -81,6 +84,14 @@ pre > code {
   font-weight: bold;
 }
 
+.code-block {
+  pre {
+    padding: 0;
+    border-top-left-radius: 0;
+    border-top-right-radius: 0;
+  }
+}
+
 @media screen and (-ms-high-contrast: active) {
   .hljs-addition,
   .hljs-attribute,
@@ -105,3 +116,52 @@ pre > code {
     font-weight: bold;
   }
 }
+
+.code-block-wrapper {
+  white-space: nowrap;
+}
+
+.code-line {
+  // padding-left: 12px;
+  padding-right: 12px;
+  margin-left: -12px;
+  margin-right: -12px;
+  border-left: 4px solid transparent;
+}
+
+div.code-line:empty {
+  height: 21.5938px;
+}
+
+span.code-line {
+  // min-width: 100%;
+  white-space: pre;
+  display: inline-block;
+  max-width: 10vw;
+}
+
+.code-line.inserted {
+  background-color: var(--color-inserted-line);
+}
+
+.code-line.deleted {
+  background-color: var(--color-deleted-line);
+}
+
+.highlighted-code-line {
+  background-color: var(--color-highlighted-line);
+  border-left: 4px solid var(--color-highlighted-line-indicator);
+}
+
+.numbered-code-line::before {
+  content: attr(data-line-number);
+
+  margin-left: -8px;
+  margin-right: 16px;
+  width: 1rem;
+  font-size: 12px;
+  color: var(--color-text-weak);
+  text-align: right;
+
+  display: inline-block;
+}
diff --git a/web/styles/components/message.scss b/web/styles/components/message.scss
index 0bc0ab6eb..9736e96f8 100644
--- a/web/styles/components/message.scss
+++ b/web/styles/components/message.scss
@@ -27,11 +27,3 @@
     @apply inline-flex flex-col border-s-4 border-[hsla(var(--primary-bg))] bg-[hsla(var(--primary-bg-soft))] px-4 py-2;
   }
 }
-
-.code-block {
-  white-space: normal;
-}
-
-pre {
-  max-width: 95vw;
-}
diff --git a/web/utils/codeLanguageExtension.ts b/web/utils/codeLanguageExtension.ts
new file mode 100644
index 000000000..cdabac015
--- /dev/null
+++ b/web/utils/codeLanguageExtension.ts
@@ -0,0 +1,34 @@
+// Utility function using switch-case for extension to language mapping
+export function getLanguageFromExtension(extension: string): string {
+  switch (extension.toLowerCase()) {
+    case 'ts':
+    case 'tsx':
+      return 'typescript'
+    case 'js':
+    case 'jsx':
+      return 'javascript'
+    case 'py':
+      return 'python'
+    case 'java':
+      return 'java'
+    case 'rb':
+      return 'ruby'
+    case 'cs':
+      return 'csharp'
+    case 'md':
+      return 'markdown'
+    case 'yaml':
+    case 'yml':
+      return 'yaml'
+    case 'sh':
+      return 'bash'
+    case 'rs':
+      return 'rust'
+    case 'kt':
+      return 'kotlin'
+    case 'swift':
+      return 'swift'
+    default:
+      return extension
+  }
+}

From a15d92dbdc0ab5748de8130060b6c156d2579554 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 14 Nov 2024 21:09:44 +0700
Subject: [PATCH 04/46] feat: integrates cortex.cpp engine variants

---
 .../inference-cortex-extension/download.sh    |  4 +-
 .../rollup.config.ts                          |  1 +
 .../src/@types/global.d.ts                    |  1 +
 .../inference-cortex-extension/src/index.ts   | 23 +++++--
 .../src/node/execute.test.ts                  | 63 ++++++++++---------
 .../src/node/execute.ts                       | 60 +++++++++---------
 .../src/node/index.ts                         | 17 +++--
 7 files changed, 96 insertions(+), 73 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 902a31e51..8c13a13ef 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index 34ad9295d..882ed1921 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -120,6 +120,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
         CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-cortex-extension/src/@types/global.d.ts b/extensions/inference-cortex-extension/src/@types/global.d.ts
index 48dbcd780..381a80f5e 100644
--- a/extensions/inference-cortex-extension/src/@types/global.d.ts
+++ b/extensions/inference-cortex-extension/src/@types/global.d.ts
@@ -1,6 +1,7 @@
 declare const NODE: string
 declare const CORTEX_API_URL: string
 declare const CORTEX_SOCKET_URL: string
+declare const CORTEX_ENGINE_VERSION: string
 declare const DEFAULT_SETTINGS: Array<any>
 declare const MODELS: Array<any>
 
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e83a17561..0331a4d17 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -18,6 +18,7 @@ import {
   fs,
   events,
   ModelEvent,
+  SystemInformation,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@@ -74,6 +75,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     this.queue.add(() => this.healthz())
 
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
     this.subscribeToEvents()
 
     window.addEventListener('beforeunload', () => {
@@ -153,7 +155,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * Do health check on cortex.cpp
    * @returns
    */
-  healthz(): Promise<void> {
+  private healthz(): Promise<void> {
     return ky
       .get(`${CORTEX_API_URL}/healthz`, {
         retry: {
@@ -164,11 +166,24 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       .then(() => {})
   }
 
+  /**
+   * Set default engine variant on launch
+   */
+  private async setDefaultEngine(systemInfo: SystemInformation) {
+    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo)
+    return ky
+      .post(
+        `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
+        { json: {} }
+      )
+      .then(() => {})
+  }
+
   /**
    * Clean cortex processes
    * @returns
    */
-  clean(): Promise<any> {
+  private clean(): Promise<any> {
     return ky
       .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
         timeout: 2000, // maximum 2 seconds
@@ -181,7 +196,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
   /**
    * Subscribe to cortex.cpp websocket events
    */
-  subscribeToEvents() {
+  private subscribeToEvents() {
     this.queue.add(
       () =>
         new Promise<void>((resolve) => {
@@ -235,7 +250,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 }
 
 /// Legacy
-export const getModelFilePath = async (
+const getModelFilePath = async (
   model: Model,
   file: string
 ): Promise<string> => {
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index 622eb38af..b0a7ece9e 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, it } from '@jest/globals'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { GpuSetting } from '@janhq/core'
 import { cpuInfo } from 'cpu-instructions'
 
@@ -46,8 +46,7 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`arm64`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -56,13 +55,13 @@ describe('test executable cortex file', () => {
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant(testSettings)).toEqual('mac-arm64')
     Object.defineProperty(process, 'arch', {
       value: 'x64',
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`x64`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -71,6 +70,7 @@ describe('test executable cortex file', () => {
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant(testSettings)).toEqual('mac-amd64')
   })
 
   it('executes on Windows CPU', () => {
@@ -84,13 +84,13 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`avx`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant()).toEqual('windows-amd64-avx')
   })
 
   it('executes on Windows Cuda 11', () => {
@@ -120,13 +120,13 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx2'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`avx2-cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-11-7')
   })
 
   it('executes on Windows Cuda 12', () => {
@@ -156,13 +156,15 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx-cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('windows-amd64-noavx-cuda-12-0')
+    mockCpuInfo.mockReturnValue(['avx512'])
+    expect(engineVariant(settings)).toEqual('windows-amd64-avx2-cuda-12-0')
   })
 
   it('executes on Linux CPU', () => {
@@ -176,12 +178,13 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`noavx`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
       })
     )
+    expect(engineVariant()).toEqual('linux-amd64-noavx')
   })
 
   it('executes on Linux Cuda 11', () => {
@@ -208,15 +211,16 @@ describe('test executable cortex file', () => {
         },
       ],
     }
+    mockCpuInfo.mockReturnValue(['avx512'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-11-7`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-11-7')
   })
 
   it('executes on Linux Cuda 12', () => {
@@ -245,13 +249,13 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`cuda-12-0`),
-        binPath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining(`bin`),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
       })
     )
+    expect(engineVariant(settings)).toEqual('linux-amd64-avx2-cuda-12-0')
   })
 
   // Generate test for different cpu instructions on Linux
@@ -270,14 +274,14 @@ describe('test executable cortex file', () => {
 
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('bin'),
           executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
       )
+      expect(engineVariant(settings)).toEqual(`linux-amd64-${instruction}`)
     })
   })
   // Generate test for different cpu instructions on Windows
@@ -294,13 +298,13 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(instruction),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining('bin'),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
         })
       )
+      expect(engineVariant(settings)).toEqual(`windows-amd64-${instruction}`)
     })
   })
 
@@ -334,13 +338,15 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).toEqual(
+        `windows-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
     })
   })
 
@@ -374,13 +380,15 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`cuda-12-0`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).toEqual(
+        `linux-amd64-${instruction === 'avx512' || instruction === 'avx2' ? 'avx2' : 'noavx'}-cuda-12-0`
+      )
     })
   })
 
@@ -415,13 +423,13 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`vulkan`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
         })
       )
+      expect(engineVariant(settings)).toEqual(`linux-amd64-vulkan`)
     })
   })
 
@@ -442,8 +450,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`x64`),
-          binPath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining(`bin`),
           executablePath:
             originalPlatform === 'darwin'
               ? expect.stringContaining(`cortex-server`)
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 74ffb48c6..48a407e31 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -4,7 +4,6 @@ import { cpuInfo } from 'cpu-instructions'
 
 export interface CortexExecutableOptions {
   enginePath: string
-  binPath: string
   executablePath: string
   cudaVisibleDevices: string
   vkVisibleDevices: string
@@ -21,11 +20,7 @@ const gpuRunMode = (settings?: GpuSetting): string => {
 
   if (!settings) return ''
 
-  return settings.vulkan === true
-    ? 'vulkan'
-    : settings.run_mode === 'cpu'
-      ? ''
-      : 'cuda'
+  return settings.vulkan === true || settings.run_mode === 'cpu' ? '' : 'cuda'
 }
 
 /**
@@ -34,12 +29,12 @@ const gpuRunMode = (settings?: GpuSetting): string => {
  */
 const os = (): string => {
   return process.platform === 'win32'
-    ? 'win'
+    ? 'windows-amd64'
     : process.platform === 'darwin'
       ? process.arch === 'arm64'
-        ? 'arm64'
-        : 'x64'
-      : 'linux'
+        ? 'mac-arm64'
+        : 'mac-amd64'
+      : 'linux-amd64'
 }
 
 /**
@@ -79,36 +74,43 @@ const cpuInstructions = (): string => {
 }
 
 /**
- * Find which executable file to run based on the current platform.
- * @returns The name of the executable file to run.
+ * The executable options for the cortex.cpp extension.
  */
 export const executableCortexFile = (
   gpuSetting?: GpuSetting
 ): CortexExecutableOptions => {
-  const cpuInstruction = cpuInstructions()
-  let engineFolder = gpuSetting?.vulkan
-    ? 'vulkan'
-    : process.platform === 'darwin'
-      ? os()
-      : [
-        gpuRunMode(gpuSetting) !== 'cuda' ||
-          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
-          ? cpuInstruction
-          : 'noavx',
-        gpuRunMode(gpuSetting),
-        cudaVersion(gpuSetting),
-      ]
-        .filter((e) => !!e)
-        .join('-')
   let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? ''
   let binaryName = `cortex-server${extension()}`
   const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: path.join(binPath, engineFolder),
+    enginePath: binPath,
     executablePath: path.join(binPath, binaryName),
-    binPath: binPath,
     cudaVisibleDevices,
     vkVisibleDevices,
   }
 }
+
+/**
+ * Find which variant to run based on the current platform.
+ */
+export const engineVariant = (gpuSetting?: GpuSetting): string => {
+  const cpuInstruction = cpuInstructions()
+  let engineVariant = [
+    os(),
+    gpuSetting?.vulkan
+      ? 'vulkan'
+      : gpuRunMode(gpuSetting) !== 'cuda'
+        ? // CPU mode - support all variants
+          cpuInstruction
+        : // GPU mode - packaged CUDA variants of avx2 and noavx
+          cpuInstruction === 'avx2' || cpuInstruction === 'avx512'
+          ? 'avx2'
+          : 'noavx',
+    gpuRunMode(gpuSetting),
+    cudaVersion(gpuSetting),
+  ]
+    .filter((e) => !!e)
+    .join('-')
+  return engineVariant
+}
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 3816605d2..cf2af045b 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -1,6 +1,6 @@
 import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
-import { executableCortexFile } from './execute'
+import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
 import { appResourcePath } from '@janhq/core/node'
 
@@ -20,9 +20,9 @@ function run(systemInfo?: SystemInformation): Promise<any> {
       // If ngl is not set or equal to 0, run on CPU with correct instructions
       systemInfo?.gpuSetting
         ? {
-          ...systemInfo.gpuSetting,
-          run_mode: systemInfo.gpuSetting.run_mode,
-        }
+            ...systemInfo.gpuSetting,
+            run_mode: systemInfo.gpuSetting.run_mode,
+          }
         : undefined
     )
 
@@ -31,7 +31,6 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
     addEnvPaths(path.join(appResourcePath(), 'shared'))
-    addEnvPaths(executableOptions.binPath)
     addEnvPaths(executableOptions.enginePath)
     // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
     // This is required for the cortex engine to run for now since dlls are not moved to the root
@@ -81,15 +80,12 @@ function dispose() {
 function addEnvPaths(dest: string) {
   // Add engine path to the PATH and LD_LIBRARY_PATH
   if (process.platform === 'win32') {
-    process.env.PATH = (process.env.PATH || '').concat(
-      path.delimiter,
-      dest,
-    )
+    process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
     log(`[CORTEX] PATH: ${process.env.PATH}`)
   } else {
     process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
       path.delimiter,
-      dest,
+      dest
     )
     log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
   }
@@ -105,4 +101,5 @@ export interface CortexProcessInfo {
 export default {
   run,
   dispose,
+  engineVariant,
 }

From a38715f18abe788d862e144f04ca6027cb1b0c2a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 14 Nov 2024 23:51:51 +0700
Subject: [PATCH 05/46] fix: should queue health check and default engine set
 before starting model

---
 extensions/inference-cortex-extension/src/index.ts | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 0331a4d17..e88608d57 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -68,14 +68,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     super.onLoad()
 
+    this.queue.add(() => this.healthz())
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
     // Run the process watchdog
     const systemInfo = await systemInformation()
     await this.clean()
     await executeOnMain(NODE, 'run', systemInfo)
-
-    this.queue.add(() => this.healthz())
-
-    this.queue.add(() => this.setDefaultEngine(systemInfo))
     this.subscribeToEvents()
 
     window.addEventListener('beforeunload', () => {

From 6f066357ed2fb137c8ff7d577c8e166ab1baf74a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sat, 16 Nov 2024 16:05:49 +0700
Subject: [PATCH 06/46] chore: bump new engine version 0.1.39 and get rid of
 dangling process

---
 extensions/inference-cortex-extension/download.bat        | 4 ++--
 extensions/inference-cortex-extension/download.sh         | 8 ++++----
 extensions/inference-cortex-extension/rollup.config.ts    | 2 +-
 extensions/inference-cortex-extension/src/node/index.ts   | 4 ++++
 web/hooks/useImportModel.ts                               | 1 -
 .../Thread/ThreadCenterPanel/LoadModelError/index.tsx     | 3 ---
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index ecff683c3..e89d42f23 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -4,8 +4,8 @@ set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
 
 @REM Download cortex.llamacpp binaries
-set VERSION=v0.1.35
-set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64
+set VERSION=v0.1.39
+set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 8c13a13ef..b6b181987 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -3,8 +3,8 @@
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39"
 # Detect platform
 OS_TYPE=$(uname)
 
@@ -38,8 +38,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.35"
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.35"
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.39"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.39"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/rollup.config.ts b/extensions/inference-cortex-extension/rollup.config.ts
index 882ed1921..00fae78ba 100644
--- a/extensions/inference-cortex-extension/rollup.config.ts
+++ b/extensions/inference-cortex-extension/rollup.config.ts
@@ -120,7 +120,7 @@ export default [
         DEFAULT_SETTINGS: JSON.stringify(defaultSettingJson),
         CORTEX_API_URL: JSON.stringify('http://127.0.0.1:39291'),
         CORTEX_SOCKET_URL: JSON.stringify('ws://127.0.0.1:39291'),
-        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.35'),
+        CORTEX_ENGINE_VERSION: JSON.stringify('v0.1.39'),
       }),
       // Allow json resolution
       json(),
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index cf2af045b..4c6d96292 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -39,6 +39,10 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     )
 
     const dataFolderPath = getJanDataFolderPath()
+    if (watchdog) {
+      watchdog.terminate()
+    }
+
     watchdog = new ProcessWatchdog(
       executableOptions.executablePath,
       [
diff --git a/web/hooks/useImportModel.ts b/web/hooks/useImportModel.ts
index c49ddb964..84c6a5126 100644
--- a/web/hooks/useImportModel.ts
+++ b/web/hooks/useImportModel.ts
@@ -9,7 +9,6 @@ import {
   OptionType,
   events,
   fs,
-  baseName,
 } from '@janhq/core'
 
 import { atom, useAtomValue, useSetAtom } from 'jotai'
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index 0420b7d51..16a0024e8 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -9,8 +9,6 @@ import { MainViewState } from '@/constants/screens'
 
 import { loadModelErrorAtom } from '@/hooks/useActiveModel'
 
-import { useSettings } from '@/hooks/useSettings'
-
 import { mainViewStateAtom } from '@/helpers/atoms/App.atom'
 import { selectedSettingAtom } from '@/helpers/atoms/Setting.atom'
 import { activeThreadAtom } from '@/helpers/atoms/Thread.atom'
@@ -21,7 +19,6 @@ const LoadModelError = () => {
   const setMainState = useSetAtom(mainViewStateAtom)
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
   const activeThread = useAtomValue(activeThreadAtom)
-  const { settings } = useSettings()
 
   const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
 

From 6e9c34baf7b7352f5052c969975a69dfe8f0a43d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sat, 16 Nov 2024 16:23:06 +0700
Subject: [PATCH 07/46] chore: cortex.cpp version bump

---
 extensions/inference-cortex-extension/bin/version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index e6d5cb833..89f843d1d 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.2
\ No newline at end of file
+1.0.3-rc1
\ No newline at end of file

From 693796a68dae560c225836430cf43500d7b978a4 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Sun, 17 Nov 2024 14:36:52 +0700
Subject: [PATCH 08/46] fix: remove hacky retrieval settings and engine routing

---
 .../assistant-extension/src/node/engine.ts    | 38 -------------------
 .../assistant-extension/src/node/retrieval.ts | 25 ++++--------
 .../ThreadCenterPanel/ChatInput/index.tsx     | 35 ++++-------------
 3 files changed, 15 insertions(+), 83 deletions(-)
 delete mode 100644 extensions/assistant-extension/src/node/engine.ts

diff --git a/extensions/assistant-extension/src/node/engine.ts b/extensions/assistant-extension/src/node/engine.ts
deleted file mode 100644
index 05a380340..000000000
--- a/extensions/assistant-extension/src/node/engine.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import fs from 'fs'
-import path from 'path'
-import { SettingComponentProps, getJanDataFolderPath } from '@janhq/core/node'
-
-// Sec: Do not send engine settings over requests
-// Read it manually instead
-export const readEmbeddingEngine = (engineName: string) => {
-  if (engineName !== 'openai' && engineName !== 'groq') {
-    const engineSettings = fs.readFileSync(
-      path.join(getJanDataFolderPath(), 'engines', `${engineName}.json`),
-      'utf-8'
-    )
-    return JSON.parse(engineSettings)
-  } else {
-    const settingDirectoryPath = path.join(
-      getJanDataFolderPath(),
-      'settings',
-      '@janhq',
-      // TODO: James - To be removed
-      engineName === 'openai'
-        ? 'inference-openai-extension'
-        : 'inference-groq-extension',
-      'settings.json'
-    )
-
-    const content = fs.readFileSync(settingDirectoryPath, 'utf-8')
-    const settings: SettingComponentProps[] = JSON.parse(content)
-    const apiKeyId = engineName === 'openai' ? 'openai-api-key' : 'groq-api-key'
-    const keySetting = settings.find((setting) => setting.key === apiKeyId)
-
-    let apiKey = keySetting?.controllerProps.value
-    if (typeof apiKey !== 'string') apiKey = ''
-
-    return {
-      api_key: apiKey,
-    }
-  }
-}
diff --git a/extensions/assistant-extension/src/node/retrieval.ts b/extensions/assistant-extension/src/node/retrieval.ts
index 3386029fa..5804ff763 100644
--- a/extensions/assistant-extension/src/node/retrieval.ts
+++ b/extensions/assistant-extension/src/node/retrieval.ts
@@ -8,7 +8,6 @@ import { MemoryVectorStore } from 'langchain/vectorstores/memory'
 import { HNSWLib } from 'langchain/vectorstores/hnswlib'
 
 import { OpenAIEmbeddings } from 'langchain/embeddings/openai'
-import { readEmbeddingEngine } from './engine'
 
 export class Retrieval {
   public chunkSize: number = 100
@@ -28,8 +27,8 @@ export class Retrieval {
     // declare time-weighted retriever and storage
     this.timeWeightedVectorStore = new MemoryVectorStore(
       new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding' },
-        { basePath: 'http://127.0.0.1:3928/v1' }
+        { openAIApiKey: 'cortex-embedding' },
+        { basePath: 'http://127.0.0.1:39291/v1' }
       )
     )
     this.timeWeightedretriever = new TimeWeightedVectorStoreRetriever({
@@ -49,21 +48,11 @@ export class Retrieval {
   }
 
   public updateEmbeddingEngine(model: string, engine: string): void {
-    // Engine settings are not compatible with the current embedding model params
-    // Switch case manually for now
-    if (engine === 'nitro') {
-      this.embeddingModel = new OpenAIEmbeddings(
-        { openAIApiKey: 'nitro-embedding', model },
-        // TODO: Raw settings
-        { basePath: 'http://127.0.0.1:3928/v1' },
-      )
-    } else {
-      // Fallback to OpenAI Settings
-      const settings = readEmbeddingEngine(engine)
-      this.embeddingModel = new OpenAIEmbeddings({
-        openAIApiKey: settings.api_key,
-      })
-    }
+    this.embeddingModel = new OpenAIEmbeddings(
+      { openAIApiKey: 'cortex-embedding', model },
+      // TODO: Raw settings
+      { basePath: 'http://127.0.0.1:39291/v1' }
+    )
 
     // update time-weighted embedding model
     this.timeWeightedVectorStore.embeddings = this.embeddingModel
diff --git a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
index 066c93430..5662cd0c0 100644
--- a/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/ChatInput/index.tsx
@@ -249,19 +249,11 @@ const ChatInput = () => {
                   <li
                     className={twMerge(
                       'text-[hsla(var(--text-secondary)] hover:bg-secondary flex w-full cursor-pointer items-center space-x-2 px-4 py-2 hover:bg-[hsla(var(--dropdown-menu-hover-bg))]',
-                      activeThread?.assistants[0].model.settings?.text_model ===
-                        false
-                        ? 'cursor-not-allowed opacity-50'
-                        : 'cursor-pointer'
+                      'cursor-pointer'
                     )}
                     onClick={() => {
-                      if (
-                        activeThread?.assistants[0].model.settings
-                          ?.text_model !== false
-                      ) {
-                        fileInputRef.current?.click()
-                        setShowAttacmentMenus(false)
-                      }
+                      fileInputRef.current?.click()
+                      setShowAttacmentMenus(false)
                     }}
                   >
                     <FileTextIcon size={16} />
@@ -270,22 +262,11 @@ const ChatInput = () => {
                 }
                 content={
                   (!activeThread?.assistants[0].tools ||
-                    !activeThread?.assistants[0].tools[0]?.enabled ||
-                    activeThread?.assistants[0].model.settings?.text_model ===
-                      false) && (
-                    <>
-                      {activeThread?.assistants[0].model.settings
-                        ?.text_model === false ? (
-                        <span>
-                          This model does not support text-based retrieval.
-                        </span>
-                      ) : (
-                        <span>
-                          Turn on Retrieval in Assistant Settings to use this
-                          feature.
-                        </span>
-                      )}
-                    </>
+                    !activeThread?.assistants[0].tools[0]?.enabled) && (
+                    <span>
+                      Turn on Retrieval in Assistant Settings to use this
+                      feature.
+                    </span>
                   )
                 }
               />

From f5a709b2681001ae19072601ba8ec732a91f8e10 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 09:24:44 +0700
Subject: [PATCH 09/46] fix: max tokens params OpenRouter (#4026)

---
 .../inference-openrouter-extension/resources/models.json      | 4 ++--
 .../SettingDetailItem/SettingDetailTextInputItem/index.tsx    | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/extensions/inference-openrouter-extension/resources/models.json b/extensions/inference-openrouter-extension/resources/models.json
index d89c07e5a..31dea8734 100644
--- a/extensions/inference-openrouter-extension/resources/models.json
+++ b/extensions/inference-openrouter-extension/resources/models.json
@@ -1,4 +1,4 @@
-  [
+[
   {
     "sources": [
       {
@@ -13,7 +13,7 @@
     "format": "api",
     "settings": {},
     "parameters": {
-      "max_tokens": 1024,
+      "max_tokens": 128000,
       "temperature": 0.7,
       "top_p": 0.95,
       "frequency_penalty": 0,
diff --git a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
index 647263ffe..73bd18f50 100644
--- a/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
+++ b/web/screens/Settings/SettingDetail/SettingDetailItem/SettingDetailTextInputItem/index.tsx
@@ -123,7 +123,6 @@ const InputExtraActions: React.FC<InputActionProps> = ({
   return (
     <div className="flex flex-row space-x-2">
       {actions.map((action) => {
-        console.log(action)
         switch (action) {
           case 'copy':
             return copied ? (

From c54838159075600ecb5791af924015e5dc242034 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 09:24:54 +0700
Subject: [PATCH 10/46] fix: api local server max ctx len not update when
 switch model (#4027)

* fix: api local server max ctx len not update when switch model

* chore: remove log
---
 .../LocalServer/LocalServerRightPanel/index.tsx    | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
index a59e83e7e..f0a11a865 100644
--- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx
@@ -19,8 +19,10 @@ import { getConfigurationsData } from '@/utils/componentSettings'
 
 import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
 import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
+import { getActiveThreadModelParamsAtom } from '@/helpers/atoms/Thread.atom'
 
 const LocalServerRightPanel = () => {
+  const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const loadModelError = useAtomValue(loadModelErrorAtom)
   const serverEnabled = useAtomValue(serverEnabledAtom)
   const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
@@ -48,8 +50,17 @@ const LocalServerRightPanel = () => {
     selectedModel
   )
 
+  const modelEngineParams = extractModelLoadParams(
+    {
+      ...selectedModel?.settings,
+      ...activeModelParams,
+    },
+    selectedModel?.settings
+  )
+
   const componentDataEngineSetting = getConfigurationsData(
-    currentModelSettingParams
+    modelEngineParams,
+    selectedModel
   )
 
   const engineSettings = useMemo(
@@ -57,6 +68,7 @@ const LocalServerRightPanel = () => {
       componentDataEngineSetting.filter(
         (x) => x.key !== 'prompt_template' && x.key !== 'embedding'
       ),
+
     [componentDataEngineSetting]
   )
 

From 5243e4a095d6151de0404633c5a50d7bc137367f Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 18 Nov 2024 14:22:10 +0700
Subject: [PATCH 11/46] fix: correct cortex repo url

---
 extensions/inference-cortex-extension/download.bat | 2 +-
 extensions/inference-cortex-extension/download.sh  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index e89d42f23..1f4102b97 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -9,7 +9,7 @@ set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VER
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
-call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
+call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
 call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index b6b181987..6a2809f0c 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,7 +2,7 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
-CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download"
+CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
 ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39"
 CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39"
 # Detect platform

From f75dc662ee74542d53b6e42405f4881325c17764 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 18 Nov 2024 15:54:26 +0700
Subject: [PATCH 12/46] chore: reduce app launch time

---
 .gitignore                                    |  1 +
 .../inference-cortex-extension/download.bat   | 25 ++++++-------
 .../inference-cortex-extension/download.sh    | 32 +++++++++--------
 .../inference-cortex-extension/src/index.ts   |  2 +-
 .../src/node/execute.test.ts                  | 35 +++++++++++--------
 .../src/node/execute.ts                       |  8 +++--
 .../src/node/index.ts                         |  9 -----
 7 files changed, 57 insertions(+), 55 deletions(-)

diff --git a/.gitignore b/.gitignore
index f28d152d9..ab815678a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -47,3 +47,4 @@ coverage
 .yarnrc
 test_results.html
 *.tsbuildinfo
+electron/shared/**
diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 1f4102b97..25527eb36 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -2,6 +2,7 @@
 set BIN_PATH=./bin
 set SHARED_PATH=./../../electron/shared
 set /p CORTEX_VERSION=<./bin/version.txt
+set ENGINE_VERSION=0.1.39
 
 @REM Download cortex.llamacpp binaries
 set VERSION=v0.1.39
@@ -10,15 +11,15 @@ set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download
 set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
 
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp
-call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-12-0/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx-cuda-11-7/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-noavx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx512/v%ENGINE_VERSION%
+call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-vulkan/v%ENGINE_VERSION%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%/cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %SHARED_PATH%
 
@@ -28,12 +29,12 @@ del %BIN_PATH%\cortex.exe
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %BIN_PATH%\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F
 
     @REM Move all .dll files except engine.dll
-    for %%D in (%BIN_PATH%\%%F\engines\cortex.llamacpp\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do (
         if /I not "%%~nxD"=="engine.dll" (
-            move "%%D" "%BIN_PATH%"
+            move "%%D" "%SHARED_PATH%"
         )
     )
 )
diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 6a2809f0c..9c0ebbe64 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -2,9 +2,11 @@
 
 # Read CORTEX_VERSION
 CORTEX_VERSION=$(cat ./bin/version.txt)
+ENGINE_VERSION=0.1.39
 CORTEX_RELEASE_URL="https://github.com/janhq/cortex.cpp/releases/download"
-ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39/cortex.llamacpp-0.1.39"
-CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.39"
+ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}/cortex.llamacpp-${ENGINE_VERSION}"
+CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v${ENGINE_VERSION}"
+SHARED_PATH="../../electron/shared"
 # Detect platform
 OS_TYPE=$(uname)
 
@@ -17,17 +19,17 @@ if [ "$OS_TYPE" == "Linux" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for Linux
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1
-    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
-    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "../../electron/shared" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx512/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-avx2-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-12-0/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-noavx-cuda-11-7/v${ENGINE_VERSION}" 1
+    download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads
@@ -38,8 +40,8 @@ elif [ "$OS_TYPE" == "Darwin" ]; then
     chmod +x "./bin/cortex-server"
 
     # Download engines for macOS
-    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-arm64/v0.1.39"
-    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "./bin/engines/cortex.llamacpp/mac-amd64/v0.1.39"
+    download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-arm64/v0.1.39"
+    download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/mac-amd64/v0.1.39"
 
 else
     echo "Unsupported operating system: $OS_TYPE"
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e88608d57..6bd3c468e 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -168,7 +168,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * Set default engine variant on launch
    */
   private async setDefaultEngine(systemInfo: SystemInformation) {
-    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo)
+    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting)
     return ky
       .post(
         `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts
index b0a7ece9e..73f114ce1 100644
--- a/extensions/inference-cortex-extension/src/node/execute.test.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from '@jest/globals'
 import { engineVariant, executableCortexFile } from './execute'
-import { GpuSetting } from '@janhq/core'
+import { GpuSetting } from '@janhq/core/node'
 import { cpuInfo } from 'cpu-instructions'
 
 let testSettings: GpuSetting = {
@@ -30,6 +30,11 @@ jest.mock('cpu-instructions', () => ({
 let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock
 mockCpuInfo.mockReturnValue([])
 
+jest.mock('@janhq/core/node', () => ({
+  appResourcePath: () => ".",
+  log: jest.fn()
+}))
+
 describe('test executable cortex file', () => {
   afterAll(function () {
     Object.defineProperty(process, 'platform', {
@@ -46,7 +51,7 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -61,7 +66,7 @@ describe('test executable cortex file', () => {
     })
     expect(executableCortexFile(testSettings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath:
           originalPlatform === 'darwin'
             ? expect.stringContaining(`cortex-server`)
@@ -84,7 +89,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -120,7 +125,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx2'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -156,7 +161,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server.exe`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -178,7 +183,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['noavx'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '',
         vkVisibleDevices: '',
@@ -214,7 +219,7 @@ describe('test executable cortex file', () => {
     mockCpuInfo.mockReturnValue(['avx512'])
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -249,7 +254,7 @@ describe('test executable cortex file', () => {
     }
     expect(executableCortexFile(settings)).toEqual(
       expect.objectContaining({
-        enginePath: expect.stringContaining(`bin`),
+        enginePath: expect.stringContaining("shared"),
         executablePath: expect.stringContaining(`cortex-server`),
         cudaVisibleDevices: '0',
         vkVisibleDevices: '0',
@@ -274,7 +279,7 @@ describe('test executable cortex file', () => {
 
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining('bin'),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server`),
 
           cudaVisibleDevices: '',
@@ -298,7 +303,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining('bin'),
+          enginePath: expect.stringContaining('shared'),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '',
           vkVisibleDevices: '',
@@ -338,7 +343,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath: expect.stringContaining(`cortex-server.exe`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -380,7 +385,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -423,7 +428,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([instruction])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath: expect.stringContaining(`cortex-server`),
           cudaVisibleDevices: '0',
           vkVisibleDevices: '0',
@@ -450,7 +455,7 @@ describe('test executable cortex file', () => {
       mockCpuInfo.mockReturnValue([])
       expect(executableCortexFile(settings)).toEqual(
         expect.objectContaining({
-          enginePath: expect.stringContaining(`bin`),
+          enginePath: expect.stringContaining("shared"),
           executablePath:
             originalPlatform === 'darwin'
               ? expect.stringContaining(`cortex-server`)
diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts
index 48a407e31..44b85d515 100644
--- a/extensions/inference-cortex-extension/src/node/execute.ts
+++ b/extensions/inference-cortex-extension/src/node/execute.ts
@@ -1,6 +1,6 @@
-import { GpuSetting } from '@janhq/core'
 import * as path from 'path'
 import { cpuInfo } from 'cpu-instructions'
+import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
 
 export interface CortexExecutableOptions {
   enginePath: string
@@ -52,7 +52,7 @@ const extension = (): '.exe' | '' => {
  */
 const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
   const isUsingCuda =
-    settings?.vulkan !== true && settings?.run_mode === 'gpu' && os() !== 'mac'
+    settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac')
 
   if (!isUsingCuda) return undefined
   return settings?.cuda?.version === '11' ? '11-7' : '12-0'
@@ -84,7 +84,7 @@ export const executableCortexFile = (
   let binaryName = `cortex-server${extension()}`
   const binPath = path.join(__dirname, '..', 'bin')
   return {
-    enginePath: binPath,
+    enginePath: path.join(appResourcePath(), 'shared'),
     executablePath: path.join(binPath, binaryName),
     cudaVisibleDevices,
     vkVisibleDevices,
@@ -112,5 +112,7 @@ export const engineVariant = (gpuSetting?: GpuSetting): string => {
   ]
     .filter((e) => !!e)
     .join('-')
+
+  log(`[CORTEX]: Engine variant: ${engineVariant}`)
   return engineVariant
 }
diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts
index 4c6d96292..a13bf6028 100644
--- a/extensions/inference-cortex-extension/src/node/index.ts
+++ b/extensions/inference-cortex-extension/src/node/index.ts
@@ -2,7 +2,6 @@ import path from 'path'
 import { getJanDataFolderPath, log, SystemInformation } from '@janhq/core/node'
 import { engineVariant, executableCortexFile } from './execute'
 import { ProcessWatchdog } from './watchdog'
-import { appResourcePath } from '@janhq/core/node'
 
 // The HOST address to use for the Nitro subprocess
 const LOCAL_PORT = '39291'
@@ -30,13 +29,7 @@ function run(systemInfo?: SystemInformation): Promise<any> {
     log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`)
     log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`)
 
-    addEnvPaths(path.join(appResourcePath(), 'shared'))
     addEnvPaths(executableOptions.enginePath)
-    // Add the cortex.llamacpp path to the PATH and LD_LIBRARY_PATH
-    // This is required for the cortex engine to run for now since dlls are not moved to the root
-    addEnvPaths(
-      path.join(executableOptions.enginePath, 'engines', 'cortex.llamacpp')
-    )
 
     const dataFolderPath = getJanDataFolderPath()
     if (watchdog) {
@@ -85,13 +78,11 @@ function addEnvPaths(dest: string) {
   // Add engine path to the PATH and LD_LIBRARY_PATH
   if (process.platform === 'win32') {
     process.env.PATH = (process.env.PATH || '').concat(path.delimiter, dest)
-    log(`[CORTEX] PATH: ${process.env.PATH}`)
   } else {
     process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat(
       path.delimiter,
       dest
     )
-    log(`[CORTEX] LD_LIBRARY_PATH: ${process.env.LD_LIBRARY_PATH}`)
   }
 }
 

From a6f2de922c566f68f56cc60e790d3df1f2f88923 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 16:10:57 +0700
Subject: [PATCH 13/46] chore: refactor rerender components

---
 electron/managers/window.ts             |  6 +++++
 web/containers/Providers/DataLoader.tsx | 16 ++++++++++--
 web/hooks/useModels.ts                  | 34 +++++++++++++++----------
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/electron/managers/window.ts b/electron/managers/window.ts
index c9c43ea77..c89e1d7c4 100644
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@@ -28,6 +28,7 @@ class WindowManager {
       ...mainWindowConfig,
       width: bounds.width,
       height: bounds.height,
+      show: false,
       x: bounds.x,
       y: bounds.y,
       webPreferences: {
@@ -78,6 +79,11 @@ class WindowManager {
         windowManager.hideMainWindow()
       }
     })
+
+    windowManager.mainWindow?.on('ready-to-show', function () {
+      // Feature Toggle for Quick Ask
+      windowManager.mainWindow?.show()
+    })
   }
 
   createQuickAskWindow(preloadPath: string, startUrl: string): void {
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index 4319c5eed..fdd7caae2 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -2,7 +2,12 @@
 
 import { Fragment, ReactNode, useEffect } from 'react'
 
-import { AppConfiguration, getUserHomePath } from '@janhq/core'
+import {
+  AppConfiguration,
+  events,
+  getUserHomePath,
+  ModelEvent,
+} from '@janhq/core'
 import { useSetAtom } from 'jotai'
 
 import useAssistants from '@/hooks/useAssistants'
@@ -30,12 +35,19 @@ const DataLoader: React.FC<Props> = ({ children }) => {
   const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
   const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
 
-  useModels()
   useThreads()
   useAssistants()
   useGetSystemResources()
   useLoadTheme()
 
+  const { loadDataModel, isUpdated } = useModels()
+  useEffect(() => {
+    // Listen for model updates
+    if (isUpdated) {
+      loadDataModel()
+    }
+  }, [isUpdated, loadDataModel])
+
   useEffect(() => {
     window.core?.api
       ?.getAppConfigurations()
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 400e02793..3f67faaeb 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -29,6 +29,7 @@ import {
 const useModels = () => {
   const setDownloadedModels = useSetAtom(downloadedModelsAtom)
   const setExtensionModels = useSetAtom(configuredModelsAtom)
+  let isUpdated = false
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
@@ -52,10 +53,10 @@ const useModels = () => {
 
       setDownloadedModels(toUpdate)
 
-      let isUpdated = false
       toUpdate.forEach((model) => {
         if (!ModelManager.instance().models.has(model.id)) {
           ModelManager.instance().models.set(model.id, model)
+          // eslint-disable-next-line react-hooks/exhaustive-deps
           isUpdated = true
         }
       })
@@ -75,21 +76,28 @@ const useModels = () => {
 
   const reloadData = useDebouncedCallback(() => getData(), 300)
 
+  const getModels = async (): Promise<Model[]> =>
+    extensionManager
+      .get<ModelExtension>(ExtensionTypeEnum.Model)
+      ?.getModels() ?? []
+
   useEffect(() => {
     // Try get data on mount
-    reloadData()
-
-    // Listen for model updates
-    events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
-    return () => {
-      // Remove listener on unmount
-      events.off(ModelEvent.OnModelsUpdate, async () => {})
+    if (isUpdated) {
+      reloadData()
+      // Listen for model updates
+      events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
+      return () => {
+        // Remove listener on unmount
+        events.off(ModelEvent.OnModelsUpdate, async () => {})
+      }
     }
-  }, [getData, reloadData])
+  }, [getData, isUpdated, reloadData])
+
+  return {
+    loadDataModel: getData,
+    isUpdated: isUpdated,
+  }
 }
 
-const getModels = async (): Promise<Model[]> =>
-  extensionManager.get<ModelExtension>(ExtensionTypeEnum.Model)?.getModels() ??
-  []
-
 export default useModels

From 18e9a2e4d8dd5c6c300c2ee0fbb9e507270067b9 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 16:17:31 +0700
Subject: [PATCH 14/46] chore: remove get data on mount

---
 web/containers/Providers/DataLoader.tsx | 7 +------
 web/hooks/useModels.ts                  | 1 -
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index fdd7caae2..8faa87865 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -2,12 +2,7 @@
 
 import { Fragment, ReactNode, useEffect } from 'react'
 
-import {
-  AppConfiguration,
-  events,
-  getUserHomePath,
-  ModelEvent,
-} from '@janhq/core'
+import { AppConfiguration, getUserHomePath } from '@janhq/core'
 import { useSetAtom } from 'jotai'
 
 import useAssistants from '@/hooks/useAssistants'
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 3f67faaeb..684ac2729 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -84,7 +84,6 @@ const useModels = () => {
   useEffect(() => {
     // Try get data on mount
     if (isUpdated) {
-      reloadData()
       // Listen for model updates
       events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
       return () => {

From 5c512ae7b4a022de15f0a9a05a3d9f5468080a64 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 16:31:50 +0700
Subject: [PATCH 15/46] chore: remove commented code

---
 electron/managers/window.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/electron/managers/window.ts b/electron/managers/window.ts
index c89e1d7c4..918036365 100644
--- a/electron/managers/window.ts
+++ b/electron/managers/window.ts
@@ -81,7 +81,6 @@ class WindowManager {
     })
 
     windowManager.mainWindow?.on('ready-to-show', function () {
-      // Feature Toggle for Quick Ask
       windowManager.mainWindow?.show()
     })
   }

From b8e521164b0e18dc01eda24dc34c418393bf7641 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 16:48:27 +0700
Subject: [PATCH 16/46] chore: remove conditional on dataloader

---
 web/containers/Providers/DataLoader.tsx |  4 +---
 web/hooks/useModels.ts                  | 11 ++++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index 8faa87865..ed4c07ec3 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -38,9 +38,7 @@ const DataLoader: React.FC<Props> = ({ children }) => {
   const { loadDataModel, isUpdated } = useModels()
   useEffect(() => {
     // Listen for model updates
-    if (isUpdated) {
-      loadDataModel()
-    }
+    loadDataModel()
   }, [isUpdated, loadDataModel])
 
   useEffect(() => {
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 684ac2729..0aed91ed2 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -1,4 +1,4 @@
-import { useCallback, useEffect } from 'react'
+import { useCallback, useEffect, useRef } from 'react'
 
 import {
   ExtensionTypeEnum,
@@ -29,10 +29,15 @@ import {
 const useModels = () => {
   const setDownloadedModels = useSetAtom(downloadedModelsAtom)
   const setExtensionModels = useSetAtom(configuredModelsAtom)
+  const hasFetchedDownloadedModels = useRef(false) // Track whether the function has been executed
+
   let isUpdated = false
 
   const getData = useCallback(() => {
+    if (hasFetchedDownloadedModels.current) return
+
     const getDownloadedModels = async () => {
+      hasFetchedDownloadedModels.current = true
       const localModels = (await getModels()).map((e) => ({
         ...e,
         name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
@@ -72,7 +77,7 @@ const useModels = () => {
     // Fetch all data
     getExtensionModels()
     getDownloadedModels()
-  }, [setDownloadedModels, setExtensionModels])
+  }, [])
 
   const reloadData = useDebouncedCallback(() => getData(), 300)
 
@@ -91,7 +96,7 @@ const useModels = () => {
         events.off(ModelEvent.OnModelsUpdate, async () => {})
       }
     }
-  }, [getData, isUpdated, reloadData])
+  }, [isUpdated, reloadData])
 
   return {
     loadDataModel: getData,

From e9fd7f4554f88dadd6d662150138158184894ee7 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 18 Nov 2024 18:22:09 +0700
Subject: [PATCH 17/46] fix: models load

---
 core/src/browser/extension.ts                 |  1 -
 .../inference-cortex-extension/src/index.ts   |  4 +-
 extensions/model-extension/src/index.ts       | 33 ++++++-------
 web/containers/Providers/DataLoader.tsx       |  7 +--
 web/containers/Providers/EventListener.tsx    |  2 +-
 .../Providers/ModelImportListener.tsx         |  2 +-
 web/hooks/useModels.ts                        | 47 +++++++++++--------
 7 files changed, 53 insertions(+), 43 deletions(-)

diff --git a/core/src/browser/extension.ts b/core/src/browser/extension.ts
index d934e1c06..b7a9fca4e 100644
--- a/core/src/browser/extension.ts
+++ b/core/src/browser/extension.ts
@@ -113,7 +113,6 @@ export abstract class BaseExtension implements ExtensionType {
     for (const model of models) {
       ModelManager.instance().register(model)
     }
-    events.emit(ModelEvent.OnModelsUpdate, {})
   }
 
   /**
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e83a17561..34a376ac8 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -215,7 +215,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
               // Delay for the state update from cortex.cpp
               // Just to be sure
               setTimeout(() => {
-                events.emit(ModelEvent.OnModelsUpdate, {})
+                events.emit(ModelEvent.OnModelsUpdate, {
+                  fetch: true,
+                })
               }, 500)
             }
           })
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index b3ad2a012..63f505bd6 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -40,11 +40,6 @@ export default class JanModelExtension extends ModelExtension {
   async onLoad() {
     this.registerSettings(SETTINGS)
 
-    // Try get models from cortex.cpp
-    this.getModels().then((models) => {
-      this.registerModels(models)
-    })
-
     // Listen to app download events
     this.handleDesktopEvents()
   }
@@ -163,19 +158,27 @@ export default class JanModelExtension extends ModelExtension {
       (e) => e.engine === InferenceEngine.nitro
     )
 
-    await this.cortexAPI.getModels().then((models) => {
-      const existingIds = models.map((e) => e.id)
-      toImportModels = toImportModels.filter(
-        (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
-      )
-    })
+    /**
+     * Fetch models from cortex.cpp
+     */
+    var fetchedModels = await this.cortexAPI.getModels().catch(() => [])
+
+    // Checking if there are models to import
+    const existingIds = fetchedModels.map((e) => e.id)
+    toImportModels = toImportModels.filter(
+      (e: Model) => !existingIds.includes(e.id) && !e.settings?.vision_model
+    )
+
+    /**
+     * There is no model to import
+     * just return fetched models
+     */
+    if (!toImportModels.length) return fetchedModels
 
     console.log('To import models:', toImportModels.length)
     /**
      * There are models to import
-     * do not return models from cortex.cpp yet
-     * otherwise it will reset the app cache
-     * */
+     */
     if (toImportModels.length > 0) {
       // Import models
       await Promise.all(
@@ -202,8 +205,6 @@ export default class JanModelExtension extends ModelExtension {
           })
         })
       )
-
-      return currentModels
     }
 
     /**
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index ed4c07ec3..d3d747d02 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -29,17 +29,18 @@ const DataLoader: React.FC<Props> = ({ children }) => {
   const setQuickAskEnabled = useSetAtom(quickAskEnabledAtom)
   const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
   const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
+  const { loadDataModel } = useModels()
 
   useThreads()
   useAssistants()
   useGetSystemResources()
   useLoadTheme()
 
-  const { loadDataModel, isUpdated } = useModels()
   useEffect(() => {
-    // Listen for model updates
+    // Load data once
     loadDataModel()
-  }, [isUpdated, loadDataModel])
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
 
   useEffect(() => {
     window.core?.api
diff --git a/web/containers/Providers/EventListener.tsx b/web/containers/Providers/EventListener.tsx
index 5cb0debab..c1dcf7c40 100644
--- a/web/containers/Providers/EventListener.tsx
+++ b/web/containers/Providers/EventListener.tsx
@@ -112,8 +112,8 @@ const EventListenerWrapper = ({ children }: PropsWithChildren) => {
         state.downloadState = 'end'
         setDownloadState(state)
         removeDownloadingModel(state.modelId)
+        events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
       }
-      events.emit(ModelEvent.OnModelsUpdate, {})
     },
     [removeDownloadingModel, setDownloadState]
   )
diff --git a/web/containers/Providers/ModelImportListener.tsx b/web/containers/Providers/ModelImportListener.tsx
index f1ca2a768..a60b7be80 100644
--- a/web/containers/Providers/ModelImportListener.tsx
+++ b/web/containers/Providers/ModelImportListener.tsx
@@ -43,7 +43,7 @@ const ModelImportListener = ({ children }: PropsWithChildren) => {
   const onImportModelSuccess = useCallback(
     (state: ImportingModel) => {
       if (!state.modelId) return
-      events.emit(ModelEvent.OnModelsUpdate, {})
+      events.emit(ModelEvent.OnModelsUpdate, { fetch: true })
       setImportingModelSuccess(state.importId, state.modelId)
     },
     [setImportingModelSuccess]
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 0aed91ed2..d2b05779f 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -1,4 +1,4 @@
-import { useCallback, useEffect, useRef } from 'react'
+import { useCallback, useEffect } from 'react'
 
 import {
   ExtensionTypeEnum,
@@ -9,7 +9,7 @@ import {
   ModelManager,
 } from '@janhq/core'
 
-import { useSetAtom } from 'jotai'
+import { useSetAtom, useAtom } from 'jotai'
 
 import { useDebouncedCallback } from 'use-debounce'
 
@@ -27,17 +27,11 @@ import {
  * and updates the atoms accordingly.
  */
 const useModels = () => {
-  const setDownloadedModels = useSetAtom(downloadedModelsAtom)
+  const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
   const setExtensionModels = useSetAtom(configuredModelsAtom)
-  const hasFetchedDownloadedModels = useRef(false) // Track whether the function has been executed
-
-  let isUpdated = false
 
   const getData = useCallback(() => {
-    if (hasFetchedDownloadedModels.current) return
-
     const getDownloadedModels = async () => {
-      hasFetchedDownloadedModels.current = true
       const localModels = (await getModels()).map((e) => ({
         ...e,
         name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
@@ -58,6 +52,8 @@ const useModels = () => {
 
       setDownloadedModels(toUpdate)
 
+      let isUpdated = false
+
       toUpdate.forEach((model) => {
         if (!ModelManager.instance().models.has(model.id)) {
           ModelManager.instance().models.set(model.id, model)
@@ -77,30 +73,41 @@ const useModels = () => {
     // Fetch all data
     getExtensionModels()
     getDownloadedModels()
-  }, [])
+  }, [setDownloadedModels, setExtensionModels])
 
   const reloadData = useDebouncedCallback(() => getData(), 300)
 
+  const updateStates = useCallback(() => {
+    const cachedModels = ModelManager.instance().models.values().toArray()
+    const toUpdate = [
+      ...downloadedModels,
+      ...cachedModels.filter(
+        (e: Model) => !downloadedModels.some((g: Model) => g.id === e.id)
+      ),
+    ]
+
+    setDownloadedModels(toUpdate)
+  }, [downloadedModels, setDownloadedModels])
+
   const getModels = async (): Promise<Model[]> =>
     extensionManager
       .get<ModelExtension>(ExtensionTypeEnum.Model)
       ?.getModels() ?? []
 
   useEffect(() => {
-    // Try get data on mount
-    if (isUpdated) {
-      // Listen for model updates
-      events.on(ModelEvent.OnModelsUpdate, async () => reloadData())
-      return () => {
-        // Remove listener on unmount
-        events.off(ModelEvent.OnModelsUpdate, async () => {})
-      }
+    // Listen for model updates
+    events.on(ModelEvent.OnModelsUpdate, async (data: { fetch?: boolean }) => {
+      if (data.fetch) reloadData()
+      else updateStates()
+    })
+    return () => {
+      // Remove listener on unmount
+      events.off(ModelEvent.OnModelsUpdate, async () => {})
     }
-  }, [isUpdated, reloadData])
+  }, [reloadData, updateStates])
 
   return {
     loadDataModel: getData,
-    isUpdated: isUpdated,
   }
 }
 

From 5c5239097de9ffa03cd785e72887bc2a1711781f Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 18 Nov 2024 19:51:49 +0700
Subject: [PATCH 18/46] chore: update test

---
 web/hooks/useModels.test.ts |  5 +++--
 web/hooks/usePath.ts        | 34 ----------------------------------
 2 files changed, 3 insertions(+), 36 deletions(-)

diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index 9b6b898ad..f9c3b04b4 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -42,8 +42,9 @@ describe('useModels', () => {
 
     jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
 
-    act(() => {
-      renderHook(() => useModels())
+    const { result } = renderHook(() => useModels())
+    await act(() => {
+      result.current?.loadDataModel()
     })
 
     expect(mockModelExtension.getModels).toHaveBeenCalled()
diff --git a/web/hooks/usePath.ts b/web/hooks/usePath.ts
index 98e3009b4..b732926a6 100644
--- a/web/hooks/usePath.ts
+++ b/web/hooks/usePath.ts
@@ -42,39 +42,6 @@ export const usePath = () => {
     openFileExplorer(fullPath)
   }
 
-  const onViewJson = async (type: string) => {
-    // TODO: this logic should be refactored.
-    if (type !== 'Model' && !activeThread) return
-
-    let filePath = undefined
-    const assistantId = activeThread?.assistants[0]?.assistant_id
-    switch (type) {
-      case 'Engine':
-      case 'Thread':
-        filePath = await joinPath([
-          'threads',
-          activeThread?.id ?? '',
-          'thread.json',
-        ])
-        break
-      case 'Model':
-        if (!selectedModel) return
-        filePath = await joinPath(['models', selectedModel.id, 'model.json'])
-        break
-      case 'Assistant':
-      case 'Tools':
-        if (!assistantId) return
-        filePath = await joinPath(['assistants', assistantId, 'assistant.json'])
-        break
-      default:
-        break
-    }
-
-    if (!filePath) return
-    const fullPath = await joinPath([janDataFolderPath, filePath])
-    openFileExplorer(fullPath)
-  }
-
   const onViewFile = async (id: string) => {
     if (!activeThread) return
 
@@ -99,7 +66,6 @@ export const usePath = () => {
 
   return {
     onRevealInFinder,
-    onViewJson,
     onViewFile,
     onViewFileContainer,
   }

From 04dd8367a1d52f1d8082d477835c1669c776a865 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Mon, 18 Nov 2024 22:05:50 +0700
Subject: [PATCH 19/46] fix: improper line break on response message (#4042)

* fix: improper line break on response message

* chore: remove blankspace classname
---
 .../Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
index 126512115..26e4659e8 100644
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@@ -383,7 +383,7 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
           ) : (
             <div
               className={twMerge(
-                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto break-all leading-relaxed	'
+                'message max-width-[100%] flex flex-col gap-y-2 overflow-auto leading-relaxed'
               )}
               dir="ltr"
             >

From 363008d37f08a07e559ee75657670d59aa2515a7 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 11:44:19 +0700
Subject: [PATCH 20/46] fix: model import edge cases

---
 .../inference-cortex-extension/src/index.ts   | 17 +++++--
 extensions/model-extension/src/index.ts       | 50 +++++--------------
 .../model-extension/src/legacy/delete.ts      |  8 +--
 .../model-extension/src/legacy/model-json.ts  | 11 ++--
 web/hooks/useModels.ts                        |  5 +-
 5 files changed, 40 insertions(+), 51 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index e24b5413f..1fb78c13e 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -19,6 +19,7 @@ import {
   events,
   ModelEvent,
   SystemInformation,
+  dirName,
 } from '@janhq/core'
 import PQueue from 'p-queue'
 import ky from 'ky'
@@ -99,10 +100,12 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       // Legacy chat model support
       model.settings = {
         ...model.settings,
-        llama_model_path: await getModelFilePath(
-          model,
-          model.settings.llama_model_path
-        ),
+        llama_model_path: model.file_path
+          ? await joinPath([
+              await dirName(model.file_path),
+              model.settings.llama_model_path,
+            ])
+          : await getModelFilePath(model, model.settings.llama_model_path),
       }
     } else {
       const { llama_model_path, ...settings } = model.settings
@@ -168,7 +171,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
    * Set default engine variant on launch
    */
   private async setDefaultEngine(systemInfo: SystemInformation) {
-    const variant = await executeOnMain(NODE, 'engineVariant', systemInfo.gpuSetting)
+    const variant = await executeOnMain(
+      NODE,
+      'engineVariant',
+      systemInfo.gpuSetting
+    )
     return ky
       .post(
         `${CORTEX_API_URL}/v1/engines/${InferenceEngine.cortex_llamacpp}/default?version=${CORTEX_ENGINE_VERSION}&variant=${variant}`,
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 63f505bd6..38c57e916 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -20,13 +20,6 @@ import { deleteModelFiles } from './legacy/delete'
 
 declare const SETTINGS: Array<any>
 
-/**
- * Extension enum
- */
-enum ExtensionEnum {
-  downloadedModels = 'downloadedModels',
-}
-
 /**
  * A extension for models
  */
@@ -122,39 +115,16 @@ export default class JanModelExtension extends ModelExtension {
    * @returns A Promise that resolves with an array of all models.
    */
   async getModels(): Promise<Model[]> {
-    /**
-     * In this action, if return empty array right away
-     * it would reset app cache and app will not function properly
-     * should compare and try import
-     */
-    let currentModels: Model[] = []
-
     /**
      * Legacy models should be supported
      */
     let legacyModels = await scanModelsFolder()
 
-    try {
-      if (!localStorage.getItem(ExtensionEnum.downloadedModels)) {
-        // Updated from an older version than 0.5.5
-        // Scan through the models folder and import them (Legacy flow)
-        // Return models immediately
-        currentModels = legacyModels
-      } else {
-        currentModels = JSON.parse(
-          localStorage.getItem(ExtensionEnum.downloadedModels)
-        ) as Model[]
-      }
-    } catch (e) {
-      currentModels = []
-      console.error(e)
-    }
-
     /**
      * Here we are filtering out the models that are not imported
      * and are not using llama.cpp engine
      */
-    var toImportModels = currentModels.filter(
+    var toImportModels = legacyModels.filter(
       (e) => e.engine === InferenceEngine.nitro
     )
 
@@ -196,13 +166,17 @@ export default class JanModelExtension extends ModelExtension {
                 ]) // Copied models
               : model.sources[0].url, // Symlink models,
             model.name
-          ).then((e) => {
-            this.updateModel({
-              id: model.id,
-              ...model.settings,
-              ...model.parameters,
-            } as Partial<Model>)
-          })
+          )
+            .then((e) => {
+              this.updateModel({
+                id: model.id,
+                ...model.settings,
+                ...model.parameters,
+              } as Partial<Model>)
+            })
+            .catch((e) => {
+              console.debug(e)
+            })
         })
       )
     }
diff --git a/extensions/model-extension/src/legacy/delete.ts b/extensions/model-extension/src/legacy/delete.ts
index 5288e30ee..43fa56d69 100644
--- a/extensions/model-extension/src/legacy/delete.ts
+++ b/extensions/model-extension/src/legacy/delete.ts
@@ -1,10 +1,12 @@
-import { fs, joinPath } from '@janhq/core'
+import { dirName, fs } from '@janhq/core'
+import { scanModelsFolder } from './model-json'
 
 export const deleteModelFiles = async (id: string) => {
   try {
-    const dirPath = await joinPath(['file://models', id])
+    const models = await scanModelsFolder()
+    const dirPath = models.find((e) => e.id === id)?.file_path
     // remove model folder directory
-    await fs.rm(dirPath)
+    if (dirPath) await fs.rm(await dirName(dirPath))
   } catch (err) {
     console.error(err)
   }
diff --git a/extensions/model-extension/src/legacy/model-json.ts b/extensions/model-extension/src/legacy/model-json.ts
index 3cad6014b..03560cde2 100644
--- a/extensions/model-extension/src/legacy/model-json.ts
+++ b/extensions/model-extension/src/legacy/model-json.ts
@@ -12,7 +12,9 @@ const LocalEngines = [
  * Scan through models folder and return downloaded models
  * @returns
  */
-export const scanModelsFolder = async (): Promise<Model[]> => {
+export const scanModelsFolder = async (): Promise<
+  (Model & { file_path?: string })[]
+> => {
   const _homeDir = 'file://models'
   try {
     if (!(await fs.existsSync(_homeDir))) {
@@ -37,7 +39,7 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
 
       const jsonPath = await getModelJsonPath(folderFullPath)
 
-      if (await fs.existsSync(jsonPath)) {
+      if (jsonPath && (await fs.existsSync(jsonPath))) {
         // if we have the model.json file, read it
         let model = await fs.readFileSync(jsonPath, 'utf-8')
 
@@ -83,7 +85,10 @@ export const scanModelsFolder = async (): Promise<Model[]> => {
                   file.toLowerCase().endsWith('.gguf') || // GGUF
                   file.toLowerCase().endsWith('.engine') // Tensort-LLM
                 )
-              })?.length >= (model.engine === InferenceEngine.nitro_tensorrt_llm ? 1 : (model.sources?.length ?? 1))
+              })?.length >=
+                (model.engine === InferenceEngine.nitro_tensorrt_llm
+                  ? 1
+                  : (model.sources?.length ?? 1))
             )
           })
 
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index d2b05779f..88ec3afb5 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -34,7 +34,7 @@ const useModels = () => {
     const getDownloadedModels = async () => {
       const localModels = (await getModels()).map((e) => ({
         ...e,
-        name: ModelManager.instance().models.get(e.id)?.name ?? e.id,
+        name: ModelManager.instance().models.get(e.id)?.name ?? e.name ?? e.id,
         metadata:
           ModelManager.instance().models.get(e.id)?.metadata ?? e.metadata,
       }))
@@ -92,7 +92,8 @@ const useModels = () => {
   const getModels = async (): Promise<Model[]> =>
     extensionManager
       .get<ModelExtension>(ExtensionTypeEnum.Model)
-      ?.getModels() ?? []
+      ?.getModels()
+      .catch(() => []) ?? []
 
   useEffect(() => {
     // Listen for model updates

From bd850fb3571fd3c45320254a84737852d839c5cc Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 12:37:41 +0700
Subject: [PATCH 21/46] chore: reduce destroy attempts

---
 extensions/inference-cortex-extension/bin/version.txt | 2 +-
 extensions/inference-cortex-extension/src/index.ts    | 8 ++++++--
 extensions/model-extension/src/cortex.ts              | 5 +++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index 89f843d1d..b26ebdeac 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.3-rc1
\ No newline at end of file
+1.0.3-rc2
\ No newline at end of file
diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 1fb78c13e..8236d7de4 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -69,11 +69,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     super.onLoad()
 
+    await this.queue.add(() => this.clean())
     this.queue.add(() => this.healthz())
     this.queue.add(() => this.setDefaultEngine(systemInfo))
     // Run the process watchdog
     const systemInfo = await systemInformation()
-    await this.clean()
     await executeOnMain(NODE, 'run', systemInfo)
     this.subscribeToEvents()
 
@@ -160,7 +160,8 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     return ky
       .get(`${CORTEX_API_URL}/healthz`, {
         retry: {
-          limit: 10,
+          limit: 20,
+          delay: () => 500,
           methods: ['get'],
         },
       })
@@ -192,6 +193,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     return ky
       .delete(`${CORTEX_API_URL}/processmanager/destroy`, {
         timeout: 2000, // maximum 2 seconds
+        retry: {
+          limit: 0,
+        },
       })
       .catch(() => {
         // Do nothing
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index 7a65e8e3f..f81cda553 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -1,6 +1,6 @@
 import PQueue from 'p-queue'
 import ky from 'ky'
-import {  extractModelLoadParams, Model } from '@janhq/core'
+import { extractModelLoadParams, Model } from '@janhq/core'
 import { extractInferenceParams } from '@janhq/core'
 /**
  * cortex.cpp Model APIs interface
@@ -155,7 +155,8 @@ export class CortexAPI implements ICortexAPI {
     return ky
       .get(`${API_URL}/healthz`, {
         retry: {
-          limit: 10,
+          limit: 20,
+          delay: () => 500,
           methods: ['get'],
         },
       })

From 28add39a5184ed503aedd723671723cc205fd0af Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 13:28:07 +0700
Subject: [PATCH 22/46] chore: add model pull options - correct url path

---
 core/src/browser/extensions/model.ts          |  1 +
 extensions/model-extension/src/cortex.ts      | 19 ++++++++++---
 extensions/model-extension/src/index.ts       |  7 +++++
 .../monitoring-extension/src/node/index.ts    |  4 +--
 web/containers/Providers/DataLoader.tsx       |  4 ++-
 web/hooks/useModels.ts                        | 27 ++++++++++++++++++-
 web/screens/Settings/Advanced/index.tsx       | 26 +++++++++++++++---
 7 files changed, 77 insertions(+), 11 deletions(-)

diff --git a/core/src/browser/extensions/model.ts b/core/src/browser/extensions/model.ts
index e224ec5cc..9a3428988 100644
--- a/core/src/browser/extensions/model.ts
+++ b/core/src/browser/extensions/model.ts
@@ -12,6 +12,7 @@ export abstract class ModelExtension extends BaseExtension implements ModelInter
     return ExtensionTypeEnum.Model
   }
 
+  abstract configurePullOptions(configs: { [key: string]: any }): Promise<any>
   abstract getModels(): Promise<Model[]>
   abstract pullModel(model: string, id?: string, name?: string): Promise<void>
   abstract cancelModelPull(modelId: string): Promise<void>
diff --git a/extensions/model-extension/src/cortex.ts b/extensions/model-extension/src/cortex.ts
index f81cda553..26316fbbc 100644
--- a/extensions/model-extension/src/cortex.ts
+++ b/extensions/model-extension/src/cortex.ts
@@ -18,6 +18,7 @@ interface ICortexAPI {
   deleteModel(model: string): Promise<void>
   updateModel(model: object): Promise<void>
   cancelModelPull(model: string): Promise<void>
+  configs(body: { [key: string]: any }): Promise<void>
 }
 
 type ModelList = {
@@ -52,7 +53,7 @@ export class CortexAPI implements ICortexAPI {
    */
   getModels(): Promise<Model[]> {
     return this.queue
-      .add(() => ky.get(`${API_URL}/models`).json<ModelList>())
+      .add(() => ky.get(`${API_URL}/v1/models`).json<ModelList>())
       .then((e) =>
         typeof e === 'object' ? e.data.map((e) => this.transformModel(e)) : []
       )
@@ -104,7 +105,7 @@ export class CortexAPI implements ICortexAPI {
    */
   deleteModel(model: string): Promise<void> {
     return this.queue.add(() =>
-      ky.delete(`${API_URL}/models/${model}`).json().then()
+      ky.delete(`${API_URL}/v1/models/${model}`).json().then()
     )
   }
 
@@ -130,7 +131,7 @@ export class CortexAPI implements ICortexAPI {
   cancelModelPull(model: string): Promise<void> {
     return this.queue.add(() =>
       ky
-        .delete(`${API_URL}/models/pull`, { json: { taskId: model } })
+        .delete(`${API_URL}/v1/models/pull`, { json: { taskId: model } })
         .json()
         .then()
     )
@@ -142,7 +143,7 @@ export class CortexAPI implements ICortexAPI {
    */
   async getModelStatus(model: string): Promise<boolean> {
     return this.queue
-      .add(() => ky.get(`${API_URL}/models/status/${model}`))
+      .add(() => ky.get(`${API_URL}/v1/models/status/${model}`))
       .then((e) => true)
       .catch(() => false)
   }
@@ -163,6 +164,16 @@ export class CortexAPI implements ICortexAPI {
       .then(() => {})
   }
 
+  /**
+   * Configure model pull options
+   * @param body
+   */
+  configs(body: { [key: string]: any }): Promise<void> {
+    return this.queue.add(() =>
+      ky.patch(`${API_URL}/v1/configs`, { json: body }).then(() => {})
+    )
+  }
+
   /**
    * TRansform model to the expected format (e.g. parameters, settings, metadata)
    * @param model
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 38c57e916..f1ce069f6 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -227,6 +227,13 @@ export default class JanModelExtension extends ModelExtension {
     return this.cortexAPI.getModelStatus(model)
   }
 
+  /**
+   * Configure pull options such as proxy, headers, etc.
+   */
+  async configurePullOptions(options: { [key: string]: any }): Promise<any> {
+    return this.cortexAPI.configs(options).catch((e) => console.debug(e))
+  }
+
   /**
    * Handle download state from main app
    */
diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
index 980ee75d1..a900490f3 100644
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@@ -267,7 +267,7 @@ const updateGpuInfo = async () =>
           }
 
           data = await updateCudaExistence(data)
-          console.log(data)
+          console.log('[MONITORING]::Cuda info: ', data)
           writeFileSync(GPU_INFO_FILE, JSON.stringify(data, null, 2))
           log(`[APP]::${JSON.stringify(data)}`)
           resolve({})
@@ -344,7 +344,7 @@ const updateCudaExistence = async (
             data.cuda.version = match[1]
           }
         }
-        console.log(data)
+        console.log('[MONITORING]::Finalized cuda info update: ', data)
         resolve()
       })
     })
diff --git a/web/containers/Providers/DataLoader.tsx b/web/containers/Providers/DataLoader.tsx
index d3d747d02..245c254ac 100644
--- a/web/containers/Providers/DataLoader.tsx
+++ b/web/containers/Providers/DataLoader.tsx
@@ -29,7 +29,7 @@ const DataLoader: React.FC<Props> = ({ children }) => {
   const setQuickAskEnabled = useSetAtom(quickAskEnabledAtom)
   const setJanDefaultDataFolder = useSetAtom(defaultJanDataFolderAtom)
   const setJanSettingScreen = useSetAtom(janSettingScreenAtom)
-  const { loadDataModel } = useModels()
+  const { loadDataModel, configurePullOptions } = useModels()
 
   useThreads()
   useAssistants()
@@ -39,6 +39,8 @@ const DataLoader: React.FC<Props> = ({ children }) => {
   useEffect(() => {
     // Load data once
     loadDataModel()
+    // Configure pull options once
+    configurePullOptions()
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])
 
diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts
index 88ec3afb5..75c86035a 100644
--- a/web/hooks/useModels.ts
+++ b/web/hooks/useModels.ts
@@ -9,13 +9,18 @@ import {
   ModelManager,
 } from '@janhq/core'
 
-import { useSetAtom, useAtom } from 'jotai'
+import { useSetAtom, useAtom, useAtomValue } from 'jotai'
 
 import { useDebouncedCallback } from 'use-debounce'
 
 import { isLocalEngine } from '@/utils/modelEngine'
 
 import { extensionManager } from '@/extension'
+import {
+  ignoreSslAtom,
+  proxyAtom,
+  proxyEnabledAtom,
+} from '@/helpers/atoms/AppConfig.atom'
 import {
   configuredModelsAtom,
   downloadedModelsAtom,
@@ -29,6 +34,9 @@ import {
 const useModels = () => {
   const [downloadedModels, setDownloadedModels] = useAtom(downloadedModelsAtom)
   const setExtensionModels = useSetAtom(configuredModelsAtom)
+  const proxyEnabled = useAtomValue(proxyEnabledAtom)
+  const proxyUrl = useAtomValue(proxyAtom)
+  const proxyIgnoreSSL = useAtomValue(ignoreSslAtom)
 
   const getData = useCallback(() => {
     const getDownloadedModels = async () => {
@@ -107,8 +115,25 @@ const useModels = () => {
     }
   }, [reloadData, updateStates])
 
+  const configurePullOptions = useCallback(() => {
+    extensionManager
+      .get<ModelExtension>(ExtensionTypeEnum.Model)
+      ?.configurePullOptions(
+        proxyEnabled
+          ? {
+              proxy_url: proxyUrl,
+              verify_peer_ssl: !proxyIgnoreSSL,
+            }
+          : {
+              proxy_url: '',
+              verify_peer_ssl: false,
+            }
+      )
+  }, [proxyEnabled, proxyUrl, proxyIgnoreSSL])
+
   return {
     loadDataModel: getData,
+    configurePullOptions,
   }
 }
 
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 150f70398..a5956fc33 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -20,9 +20,12 @@ import { AlertTriangleIcon, AlertCircleIcon } from 'lucide-react'
 
 import { twMerge } from 'tailwind-merge'
 
+import { useDebouncedCallback } from 'use-debounce'
+
 import { snackbar, toaster } from '@/containers/Toast'
 
 import { useActiveModel } from '@/hooks/useActiveModel'
+import useModels from '@/hooks/useModels'
 import { useSettings } from '@/hooks/useSettings'
 
 import DataFolder from './DataFolder'
@@ -65,6 +68,7 @@ const Advanced = () => {
   const [dropdownOptions, setDropdownOptions] = useState<HTMLDivElement | null>(
     null
   )
+  const { configurePullOptions } = useModels()
 
   const [toggle, setToggle] = useState<HTMLDivElement | null>(null)
 
@@ -78,6 +82,15 @@ const Advanced = () => {
       return y['name']
     })
 
+  /**
+   * There could be a case where the state update is not synced
+   * so that retrieving state value from other hooks would not be accurate
+   * there is also a case where state update persist everytime user type in the input
+   */
+  const updatePullOptions = useDebouncedCallback(
+    () => configurePullOptions(),
+    300
+  )
   /**
    * Handle proxy change
    */
@@ -90,8 +103,9 @@ const Advanced = () => {
       } else {
         setProxy('')
       }
+      updatePullOptions()
     },
-    [setPartialProxy, setProxy]
+    [setPartialProxy, setProxy, updatePullOptions]
   )
 
   /**
@@ -452,7 +466,10 @@ const Advanced = () => {
             <Switch
               data-testid="proxy-switch"
               checked={proxyEnabled}
-              onChange={() => setProxyEnabled(!proxyEnabled)}
+              onChange={() => {
+                setProxyEnabled(!proxyEnabled)
+                updatePullOptions()
+              }}
             />
             <div className="w-full">
               <Input
@@ -481,7 +498,10 @@ const Advanced = () => {
           <Switch
             data-testid="ignore-ssl-switch"
             checked={ignoreSSL}
-            onChange={(e) => setIgnoreSSL(e.target.checked)}
+            onChange={(e) => {
+              setIgnoreSSL(e.target.checked)
+              updatePullOptions()
+            }}
           />
         </div>
 

From 52c520d2c389f73e5bbc82d478db924ea19144ef Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 19:43:12 +0700
Subject: [PATCH 23/46] fix: app does not relaunch on Linux - add tests

---
 web/hooks/useModels.test.ts             | 82 ++++++++++++++++++++++++-
 web/screens/Settings/Advanced/index.tsx |  4 +-
 2 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts
index f9c3b04b4..e848c455c 100644
--- a/web/hooks/useModels.test.ts
+++ b/web/hooks/useModels.test.ts
@@ -1,5 +1,5 @@
 // useModels.test.ts
-import { renderHook, act } from '@testing-library/react'
+import { renderHook, act, waitFor } from '@testing-library/react'
 import { events, ModelEvent, ModelManager } from '@janhq/core'
 import { extensionManager } from '@/extension'
 
@@ -36,7 +36,6 @@ describe('useModels', () => {
         }),
         get: () => undefined,
         has: () => true,
-        // set: () => {}
       },
     })
 
@@ -50,6 +49,85 @@ describe('useModels', () => {
     expect(mockModelExtension.getModels).toHaveBeenCalled()
   })
 
+  it('should return empty on error', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockRejectedValue(new Error('Error')),
+    } as any
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+
+    const { result } = renderHook(() => useModels())
+
+    await act(() => {
+      result.current?.loadDataModel()
+    })
+
+    expect(mockModelExtension.getModels()).rejects.toThrow()
+  })
+
+  it('should update states on models update', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockResolvedValue(models),
+    } as any
+
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
+      cb({ fetch: false })
+    })
+    renderHook(() => useModels())
+
+    expect(mockModelExtension.getModels).not.toHaveBeenCalled()
+  })
+
+  it('should update states on models update', async () => {
+    const mockModelExtension = {
+      getModels: jest.fn().mockResolvedValue(models),
+    } as any
+
+    ;(ModelManager.instance as jest.Mock).mockReturnValue({
+      models: {
+        values: () => ({
+          toArray: () => ({
+            filter: () => models,
+          }),
+        }),
+        get: () => undefined,
+        has: () => true,
+      },
+    })
+
+    jest.spyOn(extensionManager, 'get').mockReturnValue(mockModelExtension)
+    jest.spyOn(events, 'on').mockImplementationOnce((event, cb) => {
+      cb({ fetch: true })
+    })
+    renderHook(() => useModels())
+
+    expect(mockModelExtension.getModels).toHaveBeenCalled()
+  })
+
   it('should remove event listener on unmount', async () => {
     const removeListenerSpy = jest.spyOn(events, 'off')
 
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index a5956fc33..62a2aded0 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -223,7 +223,9 @@ const Advanced = () => {
     }
     setGpusInUse(updatedGpusInUse)
     await saveSettings({ gpusInUse: updatedGpusInUse })
-    window.core?.api?.relaunch()
+    // Reload window to apply changes
+    // This will trigger engine servers to restart
+    window.location.reload()
   }
 
   const gpuSelectionPlaceHolder =

From 0f05910f632e1c00013a18cc8d58af4de1383655 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 19:48:50 +0700
Subject: [PATCH 24/46] chore: bump cortex.cpp 1.0.3-rc4

---
 extensions/inference-cortex-extension/bin/version.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index b26ebdeac..32ee492e0 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.3-rc2
\ No newline at end of file
+1.0.3-rc4
\ No newline at end of file

From 43eff865ff98e38bb4ca9babb8575a4272b72cfc Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Tue, 19 Nov 2024 21:42:52 +0700
Subject: [PATCH 25/46] enhance ux local server page (#4045)

---
 web/containers/ServerLogs/index.tsx           | 49 ++++++++++++++++---
 .../LocalServerCenterPanel/index.tsx          | 10 ++--
 .../LocalServerLeftPanel/index.tsx            | 12 ++++-
 3 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/web/containers/ServerLogs/index.tsx b/web/containers/ServerLogs/index.tsx
index e12d89fd1..2e978bd23 100644
--- a/web/containers/ServerLogs/index.tsx
+++ b/web/containers/ServerLogs/index.tsx
@@ -1,8 +1,8 @@
 /* eslint-disable @typescript-eslint/naming-convention */
 
-import { memo, useCallback, useEffect, useState } from 'react'
+import { memo, useCallback, useEffect, useRef, useState } from 'react'
 
-import { Button, useClipboard } from '@janhq/joi'
+import { Button, ScrollArea, useClipboard } from '@janhq/joi'
 import { useAtomValue } from 'jotai'
 
 import { FolderIcon, CheckIcon, CopyIcon } from 'lucide-react'
@@ -22,6 +22,9 @@ const ServerLogs = (props: ServerLogsProps) => {
   const { getLogs } = useLogs()
   const serverEnabled = useAtomValue(serverEnabledAtom)
   const [logs, setLogs] = useState<string[]>([])
+  const listRef = useRef<HTMLDivElement>(null)
+  const prevScrollTop = useRef(0)
+  const isUserManuallyScrollingUp = useRef(false)
 
   const updateLogs = useCallback(
     () =>
@@ -58,13 +61,45 @@ const ServerLogs = (props: ServerLogsProps) => {
 
   const clipboard = useClipboard({ timeout: 1000 })
 
+  const handleScroll = useCallback((event: React.UIEvent<HTMLElement>) => {
+    const currentScrollTop = event.currentTarget.scrollTop
+
+    if (prevScrollTop.current > currentScrollTop) {
+      isUserManuallyScrollingUp.current = true
+    } else {
+      const currentScrollTop = event.currentTarget.scrollTop
+      const scrollHeight = event.currentTarget.scrollHeight
+      const clientHeight = event.currentTarget.clientHeight
+
+      if (currentScrollTop + clientHeight >= scrollHeight) {
+        isUserManuallyScrollingUp.current = false
+      }
+    }
+
+    if (isUserManuallyScrollingUp.current === true) {
+      event.preventDefault()
+      event.stopPropagation()
+    }
+    prevScrollTop.current = currentScrollTop
+  }, [])
+
+  useEffect(() => {
+    if (isUserManuallyScrollingUp.current === true || !listRef.current) return
+    const scrollHeight = listRef.current?.scrollHeight ?? 0
+    listRef.current?.scrollTo({
+      top: scrollHeight,
+      behavior: 'instant',
+    })
+  }, [listRef.current?.scrollHeight, isUserManuallyScrollingUp, logs])
+
   return (
-    <div
+    <ScrollArea
+      ref={listRef}
       className={twMerge(
-        'p-4 pb-0',
-        !withCopy && 'max-w-[38vw] lg:max-w-[40vw] xl:max-w-[50vw]',
+        'h-[calc(100%-49px)] w-full p-4 py-0',
         logs.length === 0 && 'mx-auto'
       )}
+      onScroll={handleScroll}
     >
       {withCopy && (
         <div className="absolute right-2 top-7">
@@ -107,7 +142,7 @@ const ServerLogs = (props: ServerLogsProps) => {
       )}
       <div className="flex h-full w-full flex-col">
         {logs.length > 0 ? (
-          <code className="inline-block whitespace-break-spaces text-[13px]">
+          <code className="inline-block max-w-[38vw] whitespace-break-spaces text-[13px] lg:max-w-[40vw] xl:max-w-[50vw]">
             {logs.slice(-limit).map((log, i) => {
               return (
                 <p key={i} className="my-2 leading-relaxed">
@@ -256,7 +291,7 @@ const ServerLogs = (props: ServerLogsProps) => {
           </div>
         )}
       </div>
-    </div>
+    </ScrollArea>
   )
 }
 
diff --git a/web/screens/LocalServer/LocalServerCenterPanel/index.tsx b/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
index e16ceb329..c5e42a9d2 100644
--- a/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerCenterPanel/index.tsx
@@ -1,6 +1,6 @@
 import { useEffect, useState } from 'react'
 
-import { Button, ScrollArea } from '@janhq/joi'
+import { Button } from '@janhq/joi'
 import { CodeIcon, Paintbrush } from 'lucide-react'
 
 import { InfoIcon } from 'lucide-react'
@@ -26,8 +26,8 @@ const LocalServerCenterPanel = () => {
 
   return (
     <CenterPanelContainer>
-      <div className="flex h-full w-full flex-col overflow-hidden">
-        <div className="sticky top-0 flex  items-center justify-between border-b border-[hsla(var(--app-border))] px-4 py-2">
+      <div className="flex h-full w-full flex-col">
+        <div className="sticky top-0 z-10  flex items-center justify-between border-b border-[hsla(var(--app-border))] bg-[hsla(var(--app-bg))] px-4 py-2">
           <h2 className="font-bold">Server Logs</h2>
           <div className="space-x-2">
             <Button
@@ -72,9 +72,7 @@ const LocalServerCenterPanel = () => {
             </div>
           </div>
         ) : (
-          <ScrollArea className="h-full w-full">
-            <ServerLogs />
-          </ScrollArea>
+          <ServerLogs />
         )}
       </div>
     </CenterPanelContainer>
diff --git a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
index 6f5de80ec..91e00b430 100644
--- a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
+++ b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx
@@ -29,6 +29,7 @@ const LocalServerLeftPanel = () => {
   const [errorRangePort, setErrorRangePort] = useState(false)
   const [errorPrefix, setErrorPrefix] = useState(false)
   const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom)
+  const [isLoading, setIsLoading] = useState(false)
 
   const { startModel, stateModel } = useActiveModel()
   const selectedModel = useAtomValue(selectedModelAtom)
@@ -66,6 +67,7 @@ const LocalServerLeftPanel = () => {
   const onStartServerClick = async () => {
     if (selectedModel == null) return
     try {
+      setIsLoading(true)
       const isStarted = await window.core?.api?.startServer({
         host,
         port,
@@ -79,8 +81,10 @@ const LocalServerLeftPanel = () => {
         setFirstTimeVisitAPIServer(false)
       }
       startModel(selectedModel.id, false).catch((e) => console.error(e))
+      setIsLoading(false)
     } catch (e) {
       console.error(e)
+      setIsLoading(false)
       toaster({
         title: `Failed to start server!`,
         description: 'Please check Server Logs for more details.',
@@ -93,6 +97,7 @@ const LocalServerLeftPanel = () => {
     window.core?.api?.stopServer()
     setServerEnabled(false)
     setLoadModelError(undefined)
+    setIsLoading(false)
   }
 
   const onToggleServer = async () => {
@@ -117,6 +122,7 @@ const LocalServerLeftPanel = () => {
               block
               theme={serverEnabled ? 'destructive' : 'primary'}
               disabled={
+                isLoading ||
                 stateModel.loading ||
                 errorRangePort ||
                 errorPrefix ||
@@ -124,7 +130,11 @@ const LocalServerLeftPanel = () => {
               }
               onClick={onToggleServer}
             >
-              {serverEnabled ? 'Stop' : 'Start'} Server
+              {isLoading
+                ? 'Starting...'
+                : serverEnabled
+                  ? 'Stop Server'
+                  : 'Start Server'}
             </Button>
             {serverEnabled && (
               <Button variant="soft" asChild className="whitespace-nowrap">

From 11637c52445b12769382b8c5f2169b7d6781d8ad Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 22:10:12 +0700
Subject: [PATCH 26/46] fix: correct OpenAI o1 model parameters

---
 .../browser/extensions/engines/helpers/sse.ts    |  4 +++-
 .../resources/models.json                        | 16 ++++++----------
 .../inference-openai-extension/src/index.ts      |  4 ++--
 extensions/model-extension/package.json          |  2 +-
 4 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index 9713256b3..bd9945d3c 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -45,7 +45,9 @@ export function requestInference(
           subscriber.complete()
           return
         }
-        if (model.parameters?.stream === false) {
+        // There could be overriden stream parameter in the model
+        // that is set in request body (transformed payload)
+        if (requestBody?.stream === false || model.parameters?.stream === false) {
           const data = await response.json()
           if (transformResponse) {
             subscriber.next(transformResponse(data))
diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json
index 124e123b9..3f41c0a7d 100644
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@@ -97,11 +97,9 @@
     "format": "api",
     "settings": {},
     "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
+      "temperature": 1,
+      "top_p": 1,
+      "max_tokens": 32768,
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
@@ -125,11 +123,9 @@
     "format": "api",
     "settings": {},
     "parameters": {
-      "max_tokens": 4096,
-      "temperature": 0.7,
-      "top_p": 0.95,
-      "stream": true,
-      "stop": [],
+      "temperature": 1,
+      "top_p": 1,
+      "max_tokens": 65536,
       "frequency_penalty": 0,
       "presence_penalty": 0
     },
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 64880b678..d484c8ae5 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -76,11 +76,11 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
   transformPayload = (payload: OpenAIPayloadType): OpenAIPayloadType => {
     // Transform the payload for preview models
     if (this.previewModels.includes(payload.model)) {
-      const { max_tokens, temperature, top_p, stop, ...params } = payload
+      const { max_tokens, stop, ...params } = payload
       return {
         ...params,
         max_completion_tokens: max_tokens,
-        stream: false // o1 only support stream = false
+        stream: false, // o1 only support stream = false
       }
     }
     // Pass through for non-preview models
diff --git a/extensions/model-extension/package.json b/extensions/model-extension/package.json
index bd834454a..ca563ff9f 100644
--- a/extensions/model-extension/package.json
+++ b/extensions/model-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/model-extension",
   "productName": "Model Management",
-  "version": "1.0.34",
+  "version": "1.0.35",
   "description": "Model Management Extension provides model exploration and seamless downloads",
   "main": "dist/index.js",
   "author": "Jan <service@jan.ai>",

From af2058784664d614efd62bd0dc48dec6f725021d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 22:39:35 +0700
Subject: [PATCH 27/46] fix: handle symlink model import failure - fallback to
 legacy model run

---
 .../inference-cortex-extension/src/index.ts       | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 8236d7de4..fc7e250ab 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -100,12 +100,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       // Legacy chat model support
       model.settings = {
         ...model.settings,
-        llama_model_path: model.file_path
-          ? await joinPath([
-              await dirName(model.file_path),
-              model.settings.llama_model_path,
-            ])
-          : await getModelFilePath(model, model.settings.llama_model_path),
+        llama_model_path: await getModelFilePath(
+          model,
+          model.settings.llama_model_path
+        ),
       }
     } else {
       const { llama_model_path, ...settings } = model.settings
@@ -262,7 +260,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
 /// Legacy
 const getModelFilePath = async (
-  model: Model,
+  model: Model & { file_path?: string },
   file: string
 ): Promise<string> => {
   // Symlink to the model file
@@ -272,6 +270,9 @@ const getModelFilePath = async (
   ) {
     return model.sources[0]?.url
   }
+  if (model.file_path) {
+    await joinPath([await dirName(model.file_path), file])
+  }
   return joinPath([await getJanDataFolderPath(), 'models', model.id, file])
 }
 ///

From 8bd0f3da215ffa798eaf167267d092dd950bed5a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 23:18:52 +0700
Subject: [PATCH 28/46] fix: inconsistent error handling

---
 web/hooks/useActiveModel.ts                   | 22 ++++++------------
 web/hooks/useSendChatMessage.ts               |  8 +------
 .../LoadModelError/index.tsx                  | 23 ++++---------------
 3 files changed, 12 insertions(+), 41 deletions(-)

diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 7b9ee98e0..63513bee2 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -26,15 +26,13 @@ export const stateModelAtom = atom<ModelState>({
   model: undefined,
 })
 
-const pendingModelLoadAtom = atom<boolean>(false)
-
 export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)
   const activeThread = useAtomValue(activeThreadAtom)
   const [stateModel, setStateModel] = useAtom(stateModelAtom)
   const downloadedModels = useAtomValue(downloadedModelsAtom)
   const setLoadModelError = useSetAtom(loadModelErrorAtom)
-  const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
+  const pendingModelLoad = useRef(false)
   const isVulkanEnabled = useAtomValue(vulkanEnabledAtom)
 
   const downloadedModelsRef = useRef<Model[]>([])
@@ -55,7 +53,7 @@ export function useActiveModel() {
     if (activeModel) {
       await stopModel(activeModel)
     }
-    setPendingModelLoad(true)
+    pendingModelLoad.current = true
 
     let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
 
@@ -120,16 +118,16 @@ export function useActiveModel() {
           undefined,
         }))
 
-        if (!pendingModelLoad && abortable) {
+        if (!pendingModelLoad.current && abortable) {
           return Promise.reject(new Error('aborted'))
         }
 
         toaster({
           title: 'Failed!',
-          description: `Model ${model.id} failed to start.`,
+          description: `Model ${model.id} failed to start. ${error.message ?? ''}`,
           type: 'error',
         })
-        setLoadModelError(error)
+        setLoadModelError(error.message ?? error)
         return Promise.reject(error)
       })
   }
@@ -147,16 +145,10 @@ export function useActiveModel() {
         .then(() => {
           setActiveModel(undefined)
           setStateModel({ state: 'start', loading: false, model: undefined })
-          setPendingModelLoad(false)
+          pendingModelLoad.current = false
         })
     },
-    [
-      activeModel,
-      setStateModel,
-      setActiveModel,
-      setPendingModelLoad,
-      stateModel,
-    ]
+    [activeModel, setStateModel, setActiveModel, stateModel]
   )
 
   const stopInference = useCallback(async () => {
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index cda53b24a..bf0333d37 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -27,7 +27,7 @@ import { MessageRequestBuilder } from '@/utils/messageRequestBuilder'
 
 import { ThreadMessageBuilder } from '@/utils/threadMessageBuilder'
 
-import { loadModelErrorAtom, useActiveModel } from './useActiveModel'
+import { useActiveModel } from './useActiveModel'
 
 import { extensionManager } from '@/extension/ExtensionManager'
 import {
@@ -60,10 +60,8 @@ export default function useSendChatMessage() {
   const currentMessages = useAtomValue(getCurrentChatMessagesAtom)
   const selectedModel = useAtomValue(selectedModelAtom)
   const { activeModel, startModel } = useActiveModel()
-  const loadModelFailed = useAtomValue(loadModelErrorAtom)
 
   const modelRef = useRef<Model | undefined>()
-  const loadModelFailedRef = useRef<string | undefined>()
   const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
   const engineParamsUpdate = useAtomValue(engineParamsUpdateAtom)
 
@@ -80,10 +78,6 @@ export default function useSendChatMessage() {
     modelRef.current = activeModel
   }, [activeModel])
 
-  useEffect(() => {
-    loadModelFailedRef.current = loadModelFailed
-  }, [loadModelFailed])
-
   useEffect(() => {
     activeThreadRef.current = activeThread
   }, [activeThread])
diff --git a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
index 16a0024e8..5829a6923 100644
--- a/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/LoadModelError/index.tsx
@@ -20,23 +20,8 @@ const LoadModelError = () => {
   const setSelectedSettingScreen = useSetAtom(selectedSettingAtom)
   const activeThread = useAtomValue(activeThreadAtom)
 
-  const PORT_NOT_AVAILABLE = 'PORT_NOT_AVAILABLE'
-
   const ErrorMessage = () => {
-    if (loadModelError === PORT_NOT_AVAILABLE) {
-      return (
-        <p>
-          Port 3928 is currently unavailable. Check for conflicting apps, or
-          access&nbsp;
-          <span
-            className="cursor-pointer text-[hsla(var(--app-link))]"
-            onClick={() => setModalTroubleShooting(true)}
-          >
-            troubleshooting assistance
-          </span>
-        </p>
-      )
-    } else if (
+    if (
       typeof loadModelError?.includes === 'function' &&
       loadModelError.includes('EXTENSION_IS_NOT_INSTALLED')
     ) {
@@ -63,10 +48,10 @@ const LoadModelError = () => {
       )
     } else {
       return (
-        <div>
-          Apologies, {`Something's wrong.`}.&nbsp;
+        <div className="mx-6 flex flex-col items-center space-y-2 text-center font-medium text-[hsla(var(--text-secondary))]">
+          {loadModelError && <p>{loadModelError}</p>}
           <p>
-            Access&nbsp;
+            {`Something's wrong.`}&nbsp;Access&nbsp;
             <span
               className="cursor-pointer text-[hsla(var(--app-link))]"
               onClick={() => setModalTroubleShooting(true)}

From 0b3847a8b150332ee5d8a5aeeec86c06c43cf500 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 19 Nov 2024 23:38:06 +0700
Subject: [PATCH 29/46] chore: ensure server is started before letting other
 requests go thru

---
 extensions/inference-cortex-extension/src/index.ts | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 8236d7de4..98e2d0dd5 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -69,12 +69,13 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
 
     super.onLoad()
 
-    await this.queue.add(() => this.clean())
-    this.queue.add(() => this.healthz())
-    this.queue.add(() => this.setDefaultEngine(systemInfo))
+    this.queue.add(() => this.clean())
+    
     // Run the process watchdog
     const systemInfo = await systemInformation()
-    await executeOnMain(NODE, 'run', systemInfo)
+    this.queue.add(() => executeOnMain(NODE, 'run', systemInfo))
+    this.queue.add(() => this.healthz())
+    this.queue.add(() => this.setDefaultEngine(systemInfo))
     this.subscribeToEvents()
 
     window.addEventListener('beforeunload', () => {

From b4f25408c4a052a3e46dceb51d01a0369c19ef06 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 00:08:56 +0700
Subject: [PATCH 30/46] chore: append vision models to legacy list

---
 extensions/model-extension/src/index.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index f1ce069f6..4ebc56d54 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -143,7 +143,10 @@ export default class JanModelExtension extends ModelExtension {
      * There is no model to import
      * just return fetched models
      */
-    if (!toImportModels.length) return fetchedModels
+    if (!toImportModels.length)
+      return fetchedModels.concat(
+        legacyModels.filter((e) => e.settings?.vision_model)
+      )
 
     console.log('To import models:', toImportModels.length)
     /**

From 3ea2d9c0ae73e0b50b8364c2d9a6e6abeb971c97 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 12:33:42 +0700
Subject: [PATCH 31/46] fix: bump cortex.cpp to latest version - recursively
 scanning models folder should also include remote models

---
 extensions/inference-cortex-extension/bin/version.txt  | 2 +-
 extensions/inference-openrouter-extension/src/index.ts | 2 +-
 extensions/model-extension/src/index.ts                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt
index 32ee492e0..52da54083 100644
--- a/extensions/inference-cortex-extension/bin/version.txt
+++ b/extensions/inference-cortex-extension/bin/version.txt
@@ -1 +1 @@
-1.0.3-rc4
\ No newline at end of file
+1.0.3-rc5
\ No newline at end of file
diff --git a/extensions/inference-openrouter-extension/src/index.ts b/extensions/inference-openrouter-extension/src/index.ts
index 75d1188a8..1b2cd014d 100644
--- a/extensions/inference-openrouter-extension/src/index.ts
+++ b/extensions/inference-openrouter-extension/src/index.ts
@@ -83,6 +83,6 @@ export default class JanInferenceOpenRouterExtension extends RemoteOAIEngine {
 
   transformPayload = (payload: PayloadType) => ({
     ...payload,
-    model: this.model,
+    model: payload.model !== 'open-router-auto' ? payload.model : this.model,
   })
 }
diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index 4ebc56d54..c63510c37 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -145,7 +145,7 @@ export default class JanModelExtension extends ModelExtension {
      */
     if (!toImportModels.length)
       return fetchedModels.concat(
-        legacyModels.filter((e) => e.settings?.vision_model)
+        legacyModels.filter((e) => !fetchedModels.some((x) => x.id === e.id))
       )
 
     console.log('To import models:', toImportModels.length)

From 239e5f52e86488a536979f67c8f1ffec46148664 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 12:43:10 +0700
Subject: [PATCH 32/46] fix: force vision models to run with legacy settings

---
 extensions/inference-cortex-extension/src/index.ts | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 174ce7923..23054942e 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -95,7 +95,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
     model: Model & { file_path?: string }
   ): Promise<void> {
     if (
-      model.engine === InferenceEngine.nitro &&
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
       model.settings.llama_model_path
     ) {
       // Legacy chat model support
@@ -111,7 +111,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
       model.settings = settings
     }
 
-    if (model.engine === InferenceEngine.nitro && model.settings.mmproj) {
+    if (
+      (model.engine === InferenceEngine.nitro || model.settings.vision_model) &&
+      model.settings.mmproj
+    ) {
       // Legacy clip vision model support
       model.settings = {
         ...model.settings,

From 33bfd0eed8f0531a44a036eeff889fdd93563351 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 20:39:18 +0700
Subject: [PATCH 33/46] fix: disable timeout on model load

---
 extensions/inference-cortex-extension/src/index.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts
index 23054942e..3ff550504 100644
--- a/extensions/inference-cortex-extension/src/index.ts
+++ b/extensions/inference-cortex-extension/src/index.ts
@@ -136,6 +136,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
                 ? InferenceEngine.cortex_llamacpp
                 : model.engine,
           },
+          timeout: false,
         })
         .json()
         .catch(async (e) => {

From c6e1bb55f984a192a959e30c6962df92667bc469 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 21:14:37 +0700
Subject: [PATCH 34/46] chore: remote models error handling

---
 core/src/browser/extensions/engines/helpers/sse.ts | 2 +-
 web/containers/ErrorMessage/index.tsx              | 8 +++-----
 web/containers/Providers/EventHandler.tsx          | 6 +++++-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/core/src/browser/extensions/engines/helpers/sse.ts b/core/src/browser/extensions/engines/helpers/sse.ts
index bd9945d3c..084267582 100644
--- a/core/src/browser/extensions/engines/helpers/sse.ts
+++ b/core/src/browser/extensions/engines/helpers/sse.ts
@@ -38,7 +38,7 @@ export function requestInference(
             errorCode = ErrorCode.InvalidApiKey
           }
           const error = {
-            message: data.error?.message ?? 'Error occurred.',
+            message: data.error?.message ?? data.message ?? 'Error occurred.',
             code: errorCode,
           }
           subscriber.error(error)
diff --git a/web/containers/ErrorMessage/index.tsx b/web/containers/ErrorMessage/index.tsx
index 18558c1d8..be26ad44a 100644
--- a/web/containers/ErrorMessage/index.tsx
+++ b/web/containers/ErrorMessage/index.tsx
@@ -27,8 +27,6 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
 
   const getErrorTitle = () => {
     switch (message.error_code) {
-      case ErrorCode.Unknown:
-        return 'Apologies, something’s amiss!'
       case ErrorCode.InvalidApiKey:
       case ErrorCode.AuthenticationError:
       case ErrorCode.InvalidRequestError:
@@ -55,17 +53,17 @@ const ErrorMessage = ({ message }: { message: ThreadMessage }) => {
         )
       default:
         return (
-          <>
+          <p>
             {message.content[0]?.text?.value && (
               <AutoLink text={message.content[0].text.value} />
             )}
-          </>
+          </p>
         )
     }
   }
 
   return (
-    <div className="mt-10">
+    <div className="mx-auto mt-10 max-w-[700px]">
       {message.status === MessageStatus.Error && (
         <div
           key={message.id}
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index 6cad910f7..b51468099 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -180,7 +180,11 @@ export default function EventHandler({ children }: { children: ReactNode }) {
           setIsGeneratingResponse(false)
         }
         return
-      } else if (message.status === MessageStatus.Error) {
+      } else if (
+        message.status === MessageStatus.Error &&
+        activeModelRef.current?.engine &&
+        isLocalEngine(activeModelRef.current.engine)
+      ) {
         ;(async () => {
           if (
             !(await extensionManager

From 4820218a112009e89f2fe8928f12b057d4d35ec3 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 21:36:08 +0700
Subject: [PATCH 35/46] fix: broken cohere API response transform

---
 extensions/inference-cohere-extension/src/index.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/extensions/inference-cohere-extension/src/index.ts b/extensions/inference-cohere-extension/src/index.ts
index dd7f03317..2615ea893 100644
--- a/extensions/inference-cohere-extension/src/index.ts
+++ b/extensions/inference-cohere-extension/src/index.ts
@@ -113,6 +113,8 @@ export default class JanInferenceCohereExtension extends RemoteOAIEngine {
   }
 
   transformResponse = (data: any) => {
-    return typeof data === 'object' ? data.text : JSON.parse(data).text ?? ''
+    return typeof data === 'object'
+      ? data.text
+      : (JSON.parse(data.replace('data: ', '').trim()).text ?? '')
   }
 }

From df0801d6d949151aaddff30ea3e7ac3d36b0a6a9 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 21:40:38 +0700
Subject: [PATCH 36/46] chore: add back stream parameter to o1 models

---
 extensions/inference-openai-extension/package.json          | 2 +-
 extensions/inference-openai-extension/resources/models.json | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
index 50fe12349..9700383d6 100644
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@janhq/inference-openai-extension",
   "productName": "OpenAI Inference Engine",
-  "version": "1.0.3",
+  "version": "1.0.4",
   "description": "This extension enables OpenAI chat completion API calls",
   "main": "dist/index.js",
   "module": "dist/module.js",
diff --git a/extensions/inference-openai-extension/resources/models.json b/extensions/inference-openai-extension/resources/models.json
index 3f41c0a7d..a34bc5460 100644
--- a/extensions/inference-openai-extension/resources/models.json
+++ b/extensions/inference-openai-extension/resources/models.json
@@ -99,6 +99,7 @@
     "parameters": {
       "temperature": 1,
       "top_p": 1,
+      "stream": true,
       "max_tokens": 32768,
       "frequency_penalty": 0,
       "presence_penalty": 0
@@ -126,6 +127,7 @@
       "temperature": 1,
       "top_p": 1,
       "max_tokens": 65536,
+      "stream": true,
       "frequency_penalty": 0,
       "presence_penalty": 0
     },

From 43c5feb3cc1dc0be11a0ad1882904cb7bee42c63 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 21:45:08 +0700
Subject: [PATCH 37/46] chore: disable stream: false on o1 models

---
 extensions/inference-openai-extension/src/index.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index d484c8ae5..18bc4e0aa 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -80,7 +80,6 @@ export default class JanInferenceOpenAIExtension extends RemoteOAIEngine {
       return {
         ...params,
         max_completion_tokens: max_tokens,
-        stream: false, // o1 only support stream = false
       }
     }
     // Pass through for non-preview models

From e0a4cafd4617543c53e171553c34b9fcee0ef6ed Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Wed, 20 Nov 2024 22:14:24 +0700
Subject: [PATCH 38/46] chore: update test case

---
 web/containers/ErrorMessage/index.test.tsx | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/web/containers/ErrorMessage/index.test.tsx b/web/containers/ErrorMessage/index.test.tsx
index d2ae5aa81..306a80e32 100644
--- a/web/containers/ErrorMessage/index.test.tsx
+++ b/web/containers/ErrorMessage/index.test.tsx
@@ -63,9 +63,6 @@ describe('ErrorMessage Component', () => {
 
     render(<ErrorMessage message={message} />)
 
-    expect(
-      screen.getByText('Apologies, something’s amiss!')
-    ).toBeInTheDocument()
     expect(screen.getByText('troubleshooting assistance')).toBeInTheDocument()
   })
 

From 06df084342e17eb2e2514aae7b756d8ff722a30f Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 09:16:15 +0700
Subject: [PATCH 39/46] fix: temporary create a deps folder to have cortex.cpp
 work

---
 extensions/inference-cortex-extension/download.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index 9c0ebbe64..a8f1ade9e 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -30,6 +30,7 @@ if [ "$OS_TYPE" == "Linux" ]; then
     download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "${SHARED_PATH}/engines/cortex.llamacpp/linux-amd64-vulkan/v${ENGINE_VERSION}" 1
     download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
     download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
+    mkdir -p "${SHARED_PATH}/engines/cortex.llamacpp/deps"
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads

From 85907094807d413ebacf6dfbe5e82ab40c74396a Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 10:53:51 +0700
Subject: [PATCH 40/46] fix: correct download script to relocate dll files
 properly

---
 extensions/inference-cortex-extension/download.bat | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat
index 25527eb36..1ab14a03c 100644
--- a/extensions/inference-cortex-extension/download.bat
+++ b/extensions/inference-cortex-extension/download.bat
@@ -8,7 +8,7 @@ set ENGINE_VERSION=0.1.39
 set VERSION=v0.1.39
 set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.39-windows-amd64
 set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%
-set SUBFOLDERS=noavx-cuda-12-0 noavx-cuda-11-7 avx2-cuda-12-0 avx2-cuda-11-7 noavx avx avx2 avx512 vulkan
+set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan
 
 call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
 call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %SHARED_PATH%/engines/cortex.llamacpp/windows-amd64-avx2-cuda-12-0/v%ENGINE_VERSION%
@@ -29,12 +29,12 @@ del %BIN_PATH%\cortex.exe
 
 @REM Loop through each folder and move DLLs (excluding engine.dll)
 for %%F in (%SUBFOLDERS%) do (
-    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F
+    echo Processing folder: %SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%
 
     @REM Move all .dll files except engine.dll
-    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\*.dll) do (
+    for %%D in (%SHARED_PATH%\engines\cortex.llamacpp\%%F\v%ENGINE_VERSION%\*.dll) do (
         if /I not "%%~nxD"=="engine.dll" (
-            move "%%D" "%SHARED_PATH%"
+            move "%%D" "%BIN_PATH%"
         )
     )
 )

From daefa99a9de8e65e80b896cf0c670a31b55a5da8 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 11:17:14 +0700
Subject: [PATCH 41/46] chore: keep the deps folder with gitkeep placeholder
 file

---
 extensions/inference-cortex-extension/download.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh
index a8f1ade9e..aa09d6749 100755
--- a/extensions/inference-cortex-extension/download.sh
+++ b/extensions/inference-cortex-extension/download.sh
@@ -31,6 +31,7 @@ if [ "$OS_TYPE" == "Linux" ]; then
     download "${CUDA_DOWNLOAD_URL}/cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
     download "${CUDA_DOWNLOAD_URL}/cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "${SHARED_PATH}" 1
     mkdir -p "${SHARED_PATH}/engines/cortex.llamacpp/deps"
+    touch "${SHARED_PATH}/engines/cortex.llamacpp/deps/keep"
 
 elif [ "$OS_TYPE" == "Darwin" ]; then
     # macOS downloads

From 0039c5234e02f24ede1852366500ad93f4fa3f87 Mon Sep 17 00:00:00 2001
From: Faisal Amir <urmauur@gmail.com>
Date: Thu, 21 Nov 2024 13:08:42 +0700
Subject: [PATCH 42/46] fix: edge cases empty codeblock (#4061)

---
 .../ThreadCenterPanel/SimpleTextMessage/index.tsx   |  6 ++++--
 web/styles/components/code-block.scss               | 13 ++++++-------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
index 26e4659e8..cc5bad60e 100644
--- a/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
+++ b/web/screens/Thread/ThreadCenterPanel/SimpleTextMessage/index.tsx
@@ -104,7 +104,9 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
             ''
           )
 
-          if (!language) return node
+          if (extractCodeLines(node) === '') {
+            return node
+          }
 
           return {
             type: 'element',
@@ -145,7 +147,7 @@ const SimpleTextMessage: React.FC<ThreadMessage> = (props) => {
                             type: 'text',
                             value: language
                               ? `${getLanguageFromExtension(language)}`
-                              : 'No file name',
+                              : '',
                           },
                         ],
                       },
diff --git a/web/styles/components/code-block.scss b/web/styles/components/code-block.scss
index e739e4e24..b94ca97ec 100644
--- a/web/styles/components/code-block.scss
+++ b/web/styles/components/code-block.scss
@@ -55,7 +55,6 @@
 .hljs {
   overflow: auto;
   display: block;
-  padding: 16px;
   font-size: 14px;
   border-bottom-left-radius: 0.4rem;
   border-bottom-right-radius: 0.4rem;
@@ -65,15 +64,16 @@
 pre {
   background: hsla(var(--app-code-block));
   overflow: auto;
-  padding: 8px 16px;
+
   border-radius: 0.4rem;
 }
 pre > code {
-  text-indent: 0;
   white-space: pre;
   font-size: 14px;
   overflow: auto;
   color: #f8f8f2;
+  display: block;
+  padding: 16px;
 }
 
 .hljs-emphasis {
@@ -155,12 +155,11 @@ span.code-line {
 
 .numbered-code-line::before {
   content: attr(data-line-number);
-
-  margin-left: -8px;
+  margin-left: -4px;
   margin-right: 16px;
-  width: 1rem;
+  width: 1.2rem;
   font-size: 12px;
-  color: var(--color-text-weak);
+  color: hsla(var(--text-tertiary));
   text-align: right;
 
   display: inline-block;

From fe6412e1d4cb7b6ba1553ee5cb66793441e1dd9b Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 13:13:29 +0700
Subject: [PATCH 43/46] feat: configure HuggingfaceToken via cortex.cpp

---
 extensions/model-extension/src/index.ts | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/extensions/model-extension/src/index.ts b/extensions/model-extension/src/index.ts
index c63510c37..e29084bc2 100644
--- a/extensions/model-extension/src/index.ts
+++ b/extensions/model-extension/src/index.ts
@@ -20,6 +20,10 @@ import { deleteModelFiles } from './legacy/delete'
 
 declare const SETTINGS: Array<any>
 
+export enum Settings {
+  huggingfaceToken = 'hugging-face-access-token',
+}
+
 /**
  * A extension for models
  */
@@ -33,10 +37,29 @@ export default class JanModelExtension extends ModelExtension {
   async onLoad() {
     this.registerSettings(SETTINGS)
 
+    // Configure huggingface token if available
+    const huggingfaceToken = await this.getSetting<string>(
+      Settings.huggingfaceToken,
+      undefined
+    )
+    if (huggingfaceToken)
+      this.cortexAPI.configs({ huggingface_token: huggingfaceToken })
+
     // Listen to app download events
     this.handleDesktopEvents()
   }
 
+  /**
+   * Subscribe to settings update and make change accordingly
+   * @param key
+   * @param value
+   */
+  onSettingUpdate<T>(key: string, value: T): void {
+    if (key === Settings.huggingfaceToken) {
+      this.cortexAPI.configs({ huggingface_token: value })
+    }
+  }
+
   /**
    * Called when the extension is unloaded.
    * @override

From 55ad2f3931a7fcc9f4c6ec7278c0480ab12eb906 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 14:09:14 +0700
Subject: [PATCH 44/46] fix: fix empty string in gpus_in_use settings to select
 the correct variant

---
 web/hooks/useSettings.ts                |  2 +-
 web/screens/Settings/Advanced/index.tsx | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/web/hooks/useSettings.ts b/web/hooks/useSettings.ts
index 874381317..0f02d41af 100644
--- a/web/hooks/useSettings.ts
+++ b/web/hooks/useSettings.ts
@@ -53,7 +53,7 @@ export const useSettings = () => {
     const settings = await readSettings()
     if (runMode != null) settings.run_mode = runMode
     if (notify != null) settings.notify = notify
-    if (gpusInUse != null) settings.gpus_in_use = gpusInUse
+    if (gpusInUse != null) settings.gpus_in_use = gpusInUse.filter((e) => !!e)
     if (vulkan != null) {
       settings.vulkan = vulkan
       // GPU enabled, set run_mode to 'gpu'
diff --git a/web/screens/Settings/Advanced/index.tsx b/web/screens/Settings/Advanced/index.tsx
index 62a2aded0..114bb2460 100644
--- a/web/screens/Settings/Advanced/index.tsx
+++ b/web/screens/Settings/Advanced/index.tsx
@@ -207,7 +207,12 @@ const Advanced = () => {
     let updatedGpusInUse = [...gpusInUse]
     if (updatedGpusInUse.includes(gpuId)) {
       updatedGpusInUse = updatedGpusInUse.filter((id) => id !== gpuId)
-      if (gpuEnabled && updatedGpusInUse.length === 0) {
+      if (
+        gpuEnabled &&
+        updatedGpusInUse.length === 0 &&
+        gpuId &&
+        gpuId.trim()
+      ) {
         // Vulkan support only allow 1 active device at a time
         if (vulkanEnabled) {
           updatedGpusInUse = []
@@ -219,10 +224,10 @@ const Advanced = () => {
       if (vulkanEnabled) {
         updatedGpusInUse = []
       }
-      updatedGpusInUse.push(gpuId)
+      if (gpuId && gpuId.trim()) updatedGpusInUse.push(gpuId)
     }
     setGpusInUse(updatedGpusInUse)
-    await saveSettings({ gpusInUse: updatedGpusInUse })
+    await saveSettings({ gpusInUse: updatedGpusInUse.filter((e) => !!e) })
     // Reload window to apply changes
     // This will trigger engine servers to restart
     window.location.reload()

From b733a8738d01b98be94e6fd9bc24b0e7f5e83558 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 14:15:28 +0700
Subject: [PATCH 45/46] fix: incorrect gpu selection that cause performance
 degrade on CPU only mode

---
 extensions/monitoring-extension/src/node/index.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/monitoring-extension/src/node/index.ts b/extensions/monitoring-extension/src/node/index.ts
index a900490f3..e32f85082 100644
--- a/extensions/monitoring-extension/src/node/index.ts
+++ b/extensions/monitoring-extension/src/node/index.ts
@@ -259,11 +259,11 @@ const updateGpuInfo = async () =>
             data.gpu_highest_vram = highestVramId
           } else {
             data.gpus = []
-            data.gpu_highest_vram = ''
+            data.gpu_highest_vram = undefined
           }
 
           if (!data.gpus_in_use || data.gpus_in_use.length === 0) {
-            data.gpus_in_use = [data.gpu_highest_vram]
+            data.gpus_in_use = data.gpu_highest_vram ? [data.gpu_highest_vram].filter(e => !!e) : []
           }
 
           data = await updateCudaExistence(data)

From 868262f6253b1f2a2bde30bd7f2f5a28e678e21e Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Thu, 21 Nov 2024 14:40:25 +0700
Subject: [PATCH 46/46] chore: version bump 0.5.9

---
 web/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/package.json b/web/package.json
index 7665d354c..bfb835ec1 100644
--- a/web/package.json
+++ b/web/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@janhq/web",
-  "version": "0.1.0",
+  "version": "0.5.9",
   "private": true,
   "homepage": "./",
   "scripts": {