From c01737ff69c3d9aeb1aed53c88bedeec94741a04 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 15:46:01 +0700
Subject: [PATCH 01/58] refactor: Change inference-extension to
 inference-nitro-extension

---
 .gitignore                                    |  8 +--
 extensions/inference-extension/package.json   | 57 -------------------
 .../README.md                                 |  0
 .../bin}/linux-cpu/.gitkeep                   |  0
 .../bin}/linux-cuda/.gitkeep                  |  0
 .../bin}/linux-start.sh                       |  0
 .../bin}/mac-arm64/.gitkeep                   |  0
 .../bin}/mac-x64/.gitkeep                     |  0
 .../bin}/version.txt                          |  0
 .../bin}/win-cpu/.gitkeep                     |  0
 .../bin}/win-cuda/.gitkeep                    |  0
 .../bin}/win-start.bat                        |  0
 .../download.bat                              |  0
 .../inference-nitro-extension/package.json    | 57 +++++++++++++++++++
 .../src/@types/global.d.ts                    |  0
 .../src/helpers/sse.ts                        |  0
 .../src/index.ts                              |  0
 .../src/module.ts                             |  2 +-
 .../tsconfig.json                             |  0
 .../webpack.config.js                         |  0
 20 files changed, 62 insertions(+), 62 deletions(-)
 delete mode 100644 extensions/inference-extension/package.json
 rename extensions/{inference-extension => inference-nitro-extension}/README.md (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/linux-cpu/.gitkeep (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/linux-cuda/.gitkeep (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/linux-start.sh (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/mac-arm64/.gitkeep (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/mac-x64/.gitkeep (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/version.txt (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/win-cpu/.gitkeep (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/win-cuda/.gitkeep (100%)
 rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/win-start.bat (100%)
 rename extensions/{inference-extension => inference-nitro-extension}/download.bat (100%)
 create mode 100644 extensions/inference-nitro-extension/package.json
 rename extensions/{inference-extension => inference-nitro-extension}/src/@types/global.d.ts (100%)
 rename extensions/{inference-extension => inference-nitro-extension}/src/helpers/sse.ts (100%)
 rename extensions/{inference-extension => inference-nitro-extension}/src/index.ts (100%)
 rename extensions/{inference-extension => inference-nitro-extension}/src/module.ts (99%)
 rename extensions/{inference-extension => inference-nitro-extension}/tsconfig.json (100%)
 rename extensions/{inference-extension => inference-nitro-extension}/webpack.config.js (100%)

diff --git a/.gitignore b/.gitignore
index 4bfb0576f..d400a3b81 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,7 +17,7 @@ package-lock.json
 core/lib/**
 
 # Nitro binary files
-extensions/inference-extension/nitro/*/nitro
-extensions/inference-extension/nitro/*/*.exe
-extensions/inference-extension/nitro/*/*.dll
-extensions/inference-extension/nitro/*/*.metal
\ No newline at end of file
+extensions/inference-nitro-extension/bin/*/nitro
+extensions/inference-nitro-extension/bin/*/*.exe
+extensions/inference-nitro-extension/bin/*/*.dll
+extensions/inference-nitro-extension/bin/*/*.metal
\ No newline at end of file
diff --git a/extensions/inference-extension/package.json b/extensions/inference-extension/package.json
deleted file mode 100644
index 798d2e46d..000000000
--- a/extensions/inference-extension/package.json
+++ /dev/null
@@ -1,57 +0,0 @@
-{
-  "name": "@janhq/inference-extension",
-  "version": "1.0.0",
-  "description": "Inference Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.",
-  "main": "dist/index.js",
-  "module": "dist/module.js",
-  "author": "Jan <service@jan.ai>",
-  "license": "AGPL-3.0",
-  "scripts": {
-    "build": "tsc -b . && webpack --config webpack.config.js",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./nitro/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./nitro/linux-cpu && chmod +x ./nitro/linux-cpu/nitro && chmod +x ./nitro/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/linux-cuda && chmod +x ./nitro/linux-cuda/nitro && chmod +x ./nitro/linux-start.sh",
-    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./nitro/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./nitro/mac-arm64 && chmod +x ./nitro/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./nitro/mac-x64 && chmod +x ./nitro/mac-x64/nitro",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install",
-    "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install",
-    "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"nitro/**\" \"dist/nitro\" &&  npm pack && cpx *.tgz ../../electron/pre-install",
-    "build:publish": "run-script-os"
-  },
-  "exports": {
-    ".": "./dist/index.js",
-    "./main": "./dist/module.js"
-  },
-  "devDependencies": {
-    "cpx": "^1.5.0",
-    "rimraf": "^3.0.2",
-    "run-script-os": "^1.1.6",
-    "webpack": "^5.88.2",
-    "webpack-cli": "^5.1.4"
-  },
-  "dependencies": {
-    "@janhq/core": "file:../../core",
-    "download-cli": "^1.1.1",
-    "electron-log": "^5.0.1",
-    "fetch-retry": "^5.0.6",
-    "kill-port": "^2.0.1",
-    "path-browserify": "^1.0.1",
-    "rxjs": "^7.8.1",
-    "tcp-port-used": "^1.0.2",
-    "ts-loader": "^9.5.0",
-    "ulid": "^2.3.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "files": [
-    "dist/*",
-    "package.json",
-    "README.md"
-  ],
-  "bundleDependencies": [
-    "tcp-port-used",
-    "kill-port",
-    "fetch-retry",
-    "electron-log"
-  ]
-}
diff --git a/extensions/inference-extension/README.md b/extensions/inference-nitro-extension/README.md
similarity index 100%
rename from extensions/inference-extension/README.md
rename to extensions/inference-nitro-extension/README.md
diff --git a/extensions/inference-extension/nitro/linux-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep
similarity index 100%
rename from extensions/inference-extension/nitro/linux-cpu/.gitkeep
rename to extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep
diff --git a/extensions/inference-extension/nitro/linux-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep
similarity index 100%
rename from extensions/inference-extension/nitro/linux-cuda/.gitkeep
rename to extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep
diff --git a/extensions/inference-extension/nitro/linux-start.sh b/extensions/inference-nitro-extension/bin/linux-start.sh
similarity index 100%
rename from extensions/inference-extension/nitro/linux-start.sh
rename to extensions/inference-nitro-extension/bin/linux-start.sh
diff --git a/extensions/inference-extension/nitro/mac-arm64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep
similarity index 100%
rename from extensions/inference-extension/nitro/mac-arm64/.gitkeep
rename to extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep
diff --git a/extensions/inference-extension/nitro/mac-x64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-x64/.gitkeep
similarity index 100%
rename from extensions/inference-extension/nitro/mac-x64/.gitkeep
rename to extensions/inference-nitro-extension/bin/mac-x64/.gitkeep
diff --git a/extensions/inference-extension/nitro/version.txt b/extensions/inference-nitro-extension/bin/version.txt
similarity index 100%
rename from extensions/inference-extension/nitro/version.txt
rename to extensions/inference-nitro-extension/bin/version.txt
diff --git a/extensions/inference-extension/nitro/win-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/win-cpu/.gitkeep
similarity index 100%
rename from extensions/inference-extension/nitro/win-cpu/.gitkeep
rename to extensions/inference-nitro-extension/bin/win-cpu/.gitkeep
diff --git a/extensions/inference-extension/nitro/win-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/win-cuda/.gitkeep
similarity index 100%
rename from extensions/inference-extension/nitro/win-cuda/.gitkeep
rename to extensions/inference-nitro-extension/bin/win-cuda/.gitkeep
diff --git a/extensions/inference-extension/nitro/win-start.bat b/extensions/inference-nitro-extension/bin/win-start.bat
similarity index 100%
rename from extensions/inference-extension/nitro/win-start.bat
rename to extensions/inference-nitro-extension/bin/win-start.bat
diff --git a/extensions/inference-extension/download.bat b/extensions/inference-nitro-extension/download.bat
similarity index 100%
rename from extensions/inference-extension/download.bat
rename to extensions/inference-nitro-extension/download.bat
diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json
new file mode 100644
index 000000000..ef74fff08
--- /dev/null
+++ b/extensions/inference-nitro-extension/package.json
@@ -0,0 +1,57 @@
+{
+  "name": "@janhq/inference-nitro-extension",
+  "version": "1.0.0",
+  "description": "Inference Engine for Nitro Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.",
+  "main": "dist/index.js",
+  "module": "dist/module.js",
+  "author": "Jan <service@jan.ai>",
+  "license": "AGPL-3.0",
+  "scripts": {
+    "build": "tsc -b . && webpack --config webpack.config.js",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh",
+    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
+    "downloadnitro:win32": "download.bat",
+    "downloadnitro": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish": "run-script-os"
+  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/module.js"
+  },
+  "devDependencies": {
+    "cpx": "^1.5.0",
+    "rimraf": "^3.0.2",
+    "run-script-os": "^1.1.6",
+    "webpack": "^5.88.2",
+    "webpack-cli": "^5.1.4"
+  },
+  "dependencies": {
+    "@janhq/core": "file:../../core",
+    "download-cli": "^1.1.1",
+    "electron-log": "^5.0.1",
+    "fetch-retry": "^5.0.6",
+    "kill-port": "^2.0.1",
+    "path-browserify": "^1.0.1",
+    "rxjs": "^7.8.1",
+    "tcp-port-used": "^1.0.2",
+    "ts-loader": "^9.5.0",
+    "ulid": "^2.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "tcp-port-used",
+    "kill-port",
+    "fetch-retry",
+    "electron-log"
+  ]
+}
diff --git a/extensions/inference-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
similarity index 100%
rename from extensions/inference-extension/src/@types/global.d.ts
rename to extensions/inference-nitro-extension/src/@types/global.d.ts
diff --git a/extensions/inference-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts
similarity index 100%
rename from extensions/inference-extension/src/helpers/sse.ts
rename to extensions/inference-nitro-extension/src/helpers/sse.ts
diff --git a/extensions/inference-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
similarity index 100%
rename from extensions/inference-extension/src/index.ts
rename to extensions/inference-nitro-extension/src/index.ts
diff --git a/extensions/inference-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
similarity index 99%
rename from extensions/inference-extension/src/module.ts
rename to extensions/inference-nitro-extension/src/module.ts
index 72e418d6c..90582073e 100644
--- a/extensions/inference-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -168,7 +168,7 @@ function checkAndUnloadNitro() {
  */
 async function spawnNitroProcess(): Promise<void> {
   return new Promise((resolve, reject) => {
-    let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default
+    let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
     let binaryName;
 
     if (process.platform === "win32") {
diff --git a/extensions/inference-extension/tsconfig.json b/extensions/inference-nitro-extension/tsconfig.json
similarity index 100%
rename from extensions/inference-extension/tsconfig.json
rename to extensions/inference-nitro-extension/tsconfig.json
diff --git a/extensions/inference-extension/webpack.config.js b/extensions/inference-nitro-extension/webpack.config.js
similarity index 100%
rename from extensions/inference-extension/webpack.config.js
rename to extensions/inference-nitro-extension/webpack.config.js

From 48fd8de30cca14cd554b2e2c5e011bef3b76fc50 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 16:33:13 +0700
Subject: [PATCH 02/58] feat: Init commit for inference engine for openai
 extension

---
 .../inference-openai-extension/README.md      |  78 ++++++++
 .../inference-openai-extension/package.json   |  57 ++++++
 .../src/@types/global.d.ts                    |   2 +
 .../inference-openai-extension/src/index.ts   | 184 ++++++++++++++++++
 .../inference-openai-extension/tsconfig.json  |  15 ++
 .../webpack.config.js                         |  42 ++++
 6 files changed, 378 insertions(+)
 create mode 100644 extensions/inference-openai-extension/README.md
 create mode 100644 extensions/inference-openai-extension/package.json
 create mode 100644 extensions/inference-openai-extension/src/@types/global.d.ts
 create mode 100644 extensions/inference-openai-extension/src/index.ts
 create mode 100644 extensions/inference-openai-extension/tsconfig.json
 create mode 100644 extensions/inference-openai-extension/webpack.config.js

diff --git a/extensions/inference-openai-extension/README.md b/extensions/inference-openai-extension/README.md
new file mode 100644
index 000000000..455783efb
--- /dev/null
+++ b/extensions/inference-openai-extension/README.md
@@ -0,0 +1,78 @@
+# Jan inference plugin
+
+Created using Jan app example
+
+# Create a Jan Plugin using Typescript
+
+Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀
+
+## Create Your Own Plugin
+
+To create your own plugin, you can use this repository as a template! Just follow the below instructions:
+
+1. Click the Use this template button at the top of the repository
+2. Select Create a new repository
+3. Select an owner and name for your new repository
+4. Click Create repository
+5. Clone your new repository
+
+## Initial Setup
+
+After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin.
+
+> [!NOTE]
+>
+> You'll need to have a reasonably modern version of
+> [Node.js](https://nodejs.org) handy. If you are using a version manager like
+> [`nodenv`](https://github.com/nodenv/nodenv) or
+> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
+> root of your repository to install the version specified in
+> [`package.json`](./package.json). Otherwise, 20.x or later should work!
+
+1. :hammer_and_wrench: Install the dependencies
+
+   ```bash
+   npm install
+   ```
+
+1. :building_construction: Package the TypeScript for distribution
+
+   ```bash
+   npm run bundle
+   ```
+
+1. :white_check_mark: Check your artifact
+
+   There will be a tgz file in your plugin directory now
+
+## Update the Plugin Metadata
+
+The [`package.json`](package.json) file defines metadata about your plugin, such as
+plugin name, main entry, description and version.
+
+When you copy this repository, update `package.json` with the name, description for your plugin.
+
+## Update the Plugin Code
+
+The [`src/`](./src/) directory is the heart of your plugin! This contains the
+source code that will be run when your plugin extension functions are invoked. You can replace the
+contents of this directory with your own code.
+
+There are a few things to keep in mind when writing your plugin code:
+
+- Most Jan Plugin Extension functions are processed asynchronously.
+  In `index.ts`, you will see that the extension function will return a `Promise<any>`.
+
+  ```typescript
+  import { core } from "@janhq/core";
+
+  function onStart(): Promise<any> {
+    return core.invokePluginFunc(MODULE_PATH, "run", 0);
+  }
+  ```
+
+  For more information about the Jan Plugin Core module, see the
+  [documentation](https://github.com/janhq/jan/blob/main/core/README.md).
+
+So, what are you waiting for? Go ahead and start customizing your plugin!
+
diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
new file mode 100644
index 000000000..5d5dac264
--- /dev/null
+++ b/extensions/inference-openai-extension/package.json
@@ -0,0 +1,57 @@
+{
+  "name": "@janhq/inference-openai-extension",
+  "version": "1.0.0",
+  "description": "Inference Engine for OpenAI Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.",
+  "main": "dist/index.js",
+  "module": "dist/module.js",
+  "author": "Jan <service@jan.ai>",
+  "license": "AGPL-3.0",
+  "scripts": {
+    "build": "tsc -b . && webpack --config webpack.config.js",
+    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh",
+    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
+    "downloadnitro:win32": "download.bat",
+    "downloadnitro": "run-script-os",
+    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../electron/pre-install",
+    "build:publish": "run-script-os"
+  },
+  "exports": {
+    ".": "./dist/index.js",
+    "./main": "./dist/module.js"
+  },
+  "devDependencies": {
+    "cpx": "^1.5.0",
+    "rimraf": "^3.0.2",
+    "run-script-os": "^1.1.6",
+    "webpack": "^5.88.2",
+    "webpack-cli": "^5.1.4"
+  },
+  "dependencies": {
+    "@janhq/core": "file:../../core",
+    "download-cli": "^1.1.1",
+    "electron-log": "^5.0.1",
+    "fetch-retry": "^5.0.6",
+    "kill-port": "^2.0.1",
+    "path-browserify": "^1.0.1",
+    "rxjs": "^7.8.1",
+    "tcp-port-used": "^1.0.2",
+    "ts-loader": "^9.5.0",
+    "ulid": "^2.3.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "files": [
+    "dist/*",
+    "package.json",
+    "README.md"
+  ],
+  "bundleDependencies": [
+    "tcp-port-used",
+    "kill-port",
+    "fetch-retry",
+    "electron-log"
+  ]
+}
diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts
new file mode 100644
index 000000000..7267f0940
--- /dev/null
+++ b/extensions/inference-openai-extension/src/@types/global.d.ts
@@ -0,0 +1,2 @@
+declare const MODULE: string;
+declare const INFERENCE_URL: string;
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
new file mode 100644
index 000000000..1ba471ab1
--- /dev/null
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -0,0 +1,184 @@
+/**
+ * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ * @version 1.0.0
+ * @module inference-extension/src/index
+ */
+
+import {
+  ChatCompletionRole,
+  ContentType,
+  EventName,
+  MessageRequest,
+  MessageStatus,
+  ModelSettingParams,
+  ExtensionType,
+  ThreadContent,
+  ThreadMessage,
+  events,
+  executeOnMain,
+  getUserSpace,
+} from "@janhq/core";
+import { InferenceExtension } from "@janhq/core";
+import { requestInference } from "./helpers/sse";
+import { ulid } from "ulid";
+import { join } from "path";
+
+/**
+ * A class that implements the InferenceExtension interface from the @janhq/core package.
+ * The class provides methods for initializing and stopping a model, and for making inference requests.
+ * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
+ */
+export default class JanInferenceExtension implements InferenceExtension {
+  controller = new AbortController();
+  isCancelled = false;
+  /**
+   * Returns the type of the extension.
+   * @returns {ExtensionType} The type of the extension.
+   */
+  type(): ExtensionType {
+    return ExtensionType.Inference;
+  }
+
+  /**
+   * Subscribes to events emitted by the @janhq/core package.
+   */
+  onLoad(): void {
+    events.on(EventName.OnMessageSent, (data) =>
+      JanInferenceExtension.handleMessageRequest(data, this)
+    );
+  }
+
+  /**
+   * Stops the model inference.
+   */
+  onUnload(): void {
+    this.stopModel();
+  }
+
+  /**
+   * Initializes the model with the specified file name.
+   * @param {string} modelId - The ID of the model to initialize.
+   * @returns {Promise<void>} A promise that resolves when the model is initialized.
+   */
+  async initModel(
+    modelId: string,
+    settings?: ModelSettingParams
+  ): Promise<void> {
+    const userSpacePath = await getUserSpace();
+    const modelFullPath = join(userSpacePath, "models", modelId, modelId);
+
+    return executeOnMain(MODULE, "initModel", {
+      modelFullPath,
+      settings,
+    });
+  }
+
+  /**
+   * Stops the model.
+   * @returns {Promise<void>} A promise that resolves when the model is stopped.
+   */
+  async stopModel(): Promise<void> {
+    return executeOnMain(MODULE, "killSubprocess");
+  }
+
+  /**
+   * Stops streaming inference.
+   * @returns {Promise<void>} A promise that resolves when the streaming is stopped.
+   */
+  async stopInference(): Promise<void> {
+    this.isCancelled = true;
+    this.controller?.abort();
+  }
+
+  /**
+   * Makes a single response inference request.
+   * @param {MessageRequest} data - The data for the inference request.
+   * @returns {Promise<any>} A promise that resolves with the inference response.
+   */
+  async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
+    const timestamp = Date.now();
+    const message: ThreadMessage = {
+      thread_id: data.threadId,
+      created: timestamp,
+      updated: timestamp,
+      status: MessageStatus.Ready,
+      id: "",
+      role: ChatCompletionRole.Assistant,
+      object: "thread.message",
+      content: [],
+    };
+
+    return new Promise(async (resolve, reject) => {
+      requestInference(data.messages ?? []).subscribe({
+        next: (_content) => {},
+        complete: async () => {
+          resolve(message);
+        },
+        error: async (err) => {
+          reject(err);
+        },
+      });
+    });
+  }
+
+  /**
+   * Handles a new message request by making an inference request and emitting events.
+   * Function registered in event manager, should be static to avoid binding issues.
+   * Pass instance as a reference.
+   * @param {MessageRequest} data - The data for the new message request.
+   */
+  private static async handleMessageRequest(
+    data: MessageRequest,
+    instance: JanInferenceExtension
+  ) {
+    const timestamp = Date.now();
+    const message: ThreadMessage = {
+      id: ulid(),
+      thread_id: data.threadId,
+      assistant_id: data.assistantId,
+      role: ChatCompletionRole.Assistant,
+      content: [],
+      status: MessageStatus.Pending,
+      created: timestamp,
+      updated: timestamp,
+      object: "thread.message",
+    };
+    events.emit(EventName.OnMessageResponse, message);
+    console.log(JSON.stringify(data, null, 2));
+
+    instance.isCancelled = false;
+    instance.controller = new AbortController();
+
+    requestInference(data.messages, instance.controller).subscribe({
+      next: (content) => {
+        const messageContent: ThreadContent = {
+          type: ContentType.Text,
+          text: {
+            value: content.trim(),
+            annotations: [],
+          },
+        };
+        message.content = [messageContent];
+        events.emit(EventName.OnMessageUpdate, message);
+      },
+      complete: async () => {
+        message.status = MessageStatus.Ready;
+        events.emit(EventName.OnMessageUpdate, message);
+      },
+      error: async (err) => {
+        const messageContent: ThreadContent = {
+          type: ContentType.Text,
+          text: {
+            value: "Error occurred: " + err.message,
+            annotations: [],
+          },
+        };
+        message.content = [messageContent];
+        message.status = MessageStatus.Ready;
+        events.emit(EventName.OnMessageUpdate, message);
+      },
+    });
+  }
+}
diff --git a/extensions/inference-openai-extension/tsconfig.json b/extensions/inference-openai-extension/tsconfig.json
new file mode 100644
index 000000000..b48175a16
--- /dev/null
+++ b/extensions/inference-openai-extension/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "es2016",
+    "module": "ES6",
+    "moduleResolution": "node",
+
+    "outDir": "./dist",
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "strict": false,
+    "skipLibCheck": true,
+    "rootDir": "./src"
+  },
+  "include": ["./src"]
+}
diff --git a/extensions/inference-openai-extension/webpack.config.js b/extensions/inference-openai-extension/webpack.config.js
new file mode 100644
index 000000000..45be62271
--- /dev/null
+++ b/extensions/inference-openai-extension/webpack.config.js
@@ -0,0 +1,42 @@
+const path = require("path");
+const webpack = require("webpack");
+const packageJson = require("./package.json");
+
+module.exports = {
+  experiments: { outputModule: true },
+  entry: "./src/index.ts", // Adjust the entry point to match your project's main file
+  mode: "production",
+  module: {
+    rules: [
+      {
+        test: /\.tsx?$/,
+        use: "ts-loader",
+        exclude: /node_modules/,
+      },
+    ],
+  },
+  plugins: [
+    new webpack.DefinePlugin({
+      MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
+      INFERENCE_URL: JSON.stringify(
+        process.env.INFERENCE_URL ||
+          "http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
+      ),
+    }),
+  ],
+  output: {
+    filename: "index.js", // Adjust the output file name as needed
+    path: path.resolve(__dirname, "dist"),
+    library: { type: "module" }, // Specify ESM output format
+  },
+  resolve: {
+    extensions: [".ts", ".js"],
+    fallback: {
+      path: require.resolve("path-browserify"),
+    },
+  },
+  optimization: {
+    minimize: false,
+  },
+  // Add loaders and other configuration as needed for your project
+};

From 19637c40bfedd934d926169765ad81f1b35ea1b4 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 18:13:45 +0700
Subject: [PATCH 03/58] feat: Add nitro engine settings

---
 extensions/inference-nitro-extension/nitro.json | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 extensions/inference-nitro-extension/nitro.json

diff --git a/extensions/inference-nitro-extension/nitro.json b/extensions/inference-nitro-extension/nitro.json
new file mode 100644
index 000000000..8b01cb908
--- /dev/null
+++ b/extensions/inference-nitro-extension/nitro.json
@@ -0,0 +1,6 @@
+{
+    "ctx_len": 2048,
+    "ngl": 100,
+    "cont_batching": false,
+    "embedding": false
+}
\ No newline at end of file

From 337da5084082cc9be74ce42689211b546a903716 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 18:14:13 +0700
Subject: [PATCH 04/58] fix: Add fs to read and write nitro engine settings

---
 extensions/inference-nitro-extension/src/index.ts  | 14 ++++++++++++--
 extensions/inference-nitro-extension/src/module.ts |  6 ++----
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index e8e7758bb..0a1011772 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -19,6 +19,7 @@ import {
   events,
   executeOnMain,
   getUserSpace,
+  fs
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
@@ -31,6 +32,9 @@ import { join } from "path";
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
 export default class JanInferenceExtension implements InferenceExtension {
+  private static readonly _homeDir = 'engines'
+  private static readonly _engineMetadataFileName = 'nitro.json'
+
   controller = new AbortController();
   isCancelled = false;
   /**
@@ -45,6 +49,8 @@ export default class JanInferenceExtension implements InferenceExtension {
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
+    fs.mkdir(JanInferenceExtension._homeDir)
+
     events.on(EventName.OnMessageSent, (data) =>
       JanInferenceExtension.handleMessageRequest(data, this)
     );
@@ -68,10 +74,14 @@ export default class JanInferenceExtension implements InferenceExtension {
   ): Promise<void> {
     const userSpacePath = await getUserSpace();
     const modelFullPath = join(userSpacePath, "models", modelId, modelId);
-
+    let engine_settings = JSON.parse(await fs.readFile(join(JanInferenceExtension._homeDir, JanInferenceExtension._engineMetadataFileName)))
+    engine_settings = {
+      engine_settings
+      ...settings,     
+    };
     return executeOnMain(MODULE, "initModel", {
       modelFullPath,
-      settings,
+      engine_settings,
     });
   }
 
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 90582073e..e9b2d8eb5 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -42,12 +42,10 @@ function initModel(wrapper: any): Promise<InitModelResponse> {
 
   const settings = {
     llama_model_path: currentModelFile,
-    ctx_len: 2048,
-    ngl: 100,
-    cont_batching: false,
-    embedding: false, // Always enable embedding mode on
     ...wrapper.settings,
   };
+
+
   log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
 
   return (

From d69f0e3321380644b30ef714784bebe34c50e126 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 18:14:29 +0700
Subject: [PATCH 05/58] chore: Update openai engine

---
 .../src/helpers/sse.ts                        | 56 +++++++++++++++
 .../inference-openai-extension/src/index.ts   | 69 ++++++++++---------
 .../inference-openai-extension/src/module.ts  | 34 +++++++++
 3 files changed, 127 insertions(+), 32 deletions(-)
 create mode 100644 extensions/inference-openai-extension/src/helpers/sse.ts
 create mode 100644 extensions/inference-openai-extension/src/module.ts

diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts
new file mode 100644
index 000000000..f427e443c
--- /dev/null
+++ b/extensions/inference-openai-extension/src/helpers/sse.ts
@@ -0,0 +1,56 @@
+import { Observable } from "rxjs";
+/**
+ * Sends a request to the inference server to generate a response based on the recent messages.
+ * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @returns An Observable that emits the generated response as a string.
+ */
+export function requestInference(
+  recentMessages: any[],
+  controller?: AbortController
+): Observable<string> {
+  return new Observable((subscriber) => {
+    const requestBody = JSON.stringify({
+      messages: recentMessages,
+      stream: true,
+      model: "gpt-3.5-turbo",
+      max_tokens: 2048,
+    });
+    fetch(INFERENCE_URL, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Accept: "text/event-stream",
+        "Access-Control-Allow-Origin": "*",
+      },
+      body: requestBody,
+      signal: controller?.signal,
+    })
+      .then(async (response) => {
+        const stream = response.body;
+        const decoder = new TextDecoder("utf-8");
+        const reader = stream?.getReader();
+        let content = "";
+
+        while (true && reader) {
+          const { done, value } = await reader.read();
+          if (done) {
+            break;
+          }
+          const text = decoder.decode(value);
+          const lines = text.trim().split("\n");
+          for (const line of lines) {
+            if (line.startsWith("data: ") && !line.includes("data: [DONE]")) {
+              const data = JSON.parse(line.replace("data: ", ""));
+              content += data.choices[0]?.delta?.content ?? "";
+              if (content.startsWith("assistant: ")) {
+                content = content.replace("assistant: ", "");
+              }
+              subscriber.next(content);
+            }
+          }
+        }
+        subscriber.complete();
+      })
+      .catch((err) => subscriber.error(err));
+  });
+}
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 1ba471ab1..652f47b6c 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -3,7 +3,7 @@
  * The class provides methods for initializing and stopping a model, and for making inference requests.
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  * @version 1.0.0
- * @module inference-extension/src/index
+ * @module inference-openai-extension/src/index
  */
 
 import {
@@ -19,6 +19,7 @@ import {
   events,
   executeOnMain,
   getUserSpace,
+  fs
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
@@ -31,20 +32,26 @@ import { join } from "path";
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
 export default class JanInferenceExtension implements InferenceExtension {
+  private static readonly _homeDir = 'engines'
+  private static readonly _engineMetadataFileName = 'openai.json'
+  
   controller = new AbortController();
   isCancelled = false;
   /**
    * Returns the type of the extension.
    * @returns {ExtensionType} The type of the extension.
    */
+  // TODO: To fix
   type(): ExtensionType {
-    return ExtensionType.Inference;
+    return undefined;
   }
-
+// janroot/engine/nitro.json
   /**
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
+    fs.mkdir(JanInferenceExtension._homeDir)
+    // TODO: Copy nitro.json to janroot/engine/nitro.json
     events.on(EventName.OnMessageSent, (data) =>
       JanInferenceExtension.handleMessageRequest(data, this)
     );
@@ -53,9 +60,7 @@ export default class JanInferenceExtension implements InferenceExtension {
   /**
    * Stops the model inference.
    */
-  onUnload(): void {
-    this.stopModel();
-  }
+  onUnload(): void {}
 
   /**
    * Initializes the model with the specified file name.
@@ -79,9 +84,7 @@ export default class JanInferenceExtension implements InferenceExtension {
    * Stops the model.
    * @returns {Promise<void>} A promise that resolves when the model is stopped.
    */
-  async stopModel(): Promise<void> {
-    return executeOnMain(MODULE, "killSubprocess");
-  }
+  async stopModel(): Promise<void> {}
 
   /**
    * Stops streaming inference.
@@ -92,35 +95,37 @@ export default class JanInferenceExtension implements InferenceExtension {
     this.controller?.abort();
   }
 
+  private async copyModelsToHomeDir() {
+    try {
+    // list all of the files under the home directory
+    const files = await fs.listFiles('')
+    
+    if (files.includes(JanInferenceExtension._homeDir)) {
+      // ignore if the model is already downloaded
+      console.debug('Model already downloaded')
+    return
+    }
+    
+    // copy models folder from resources to home directory
+    const resourePath = await getResourcePath()
+    const srcPath = join(resourePath, 'models')
+    
+    const userSpace = await getUserSpace()
+    const destPath = join(userSpace, JanInferenceExtension._homeDir)
+    
+    await fs.copyFile(srcPath, destPath)
+    } catch (err) {
+      console.error(err)
+    }
+    }
+
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
    * @returns {Promise<any>} A promise that resolves with the inference response.
    */
   async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
-    const timestamp = Date.now();
-    const message: ThreadMessage = {
-      thread_id: data.threadId,
-      created: timestamp,
-      updated: timestamp,
-      status: MessageStatus.Ready,
-      id: "",
-      role: ChatCompletionRole.Assistant,
-      object: "thread.message",
-      content: [],
-    };
-
-    return new Promise(async (resolve, reject) => {
-      requestInference(data.messages ?? []).subscribe({
-        next: (_content) => {},
-        complete: async () => {
-          resolve(message);
-        },
-        error: async (err) => {
-          reject(err);
-        },
-      });
-    });
+    // TODO: @louis
   }
 
   /**
diff --git a/extensions/inference-openai-extension/src/module.ts b/extensions/inference-openai-extension/src/module.ts
new file mode 100644
index 000000000..305c2e804
--- /dev/null
+++ b/extensions/inference-openai-extension/src/module.ts
@@ -0,0 +1,34 @@
+const fetchRetry = require("fetch-retry")(global.fetch);
+
+const log = require("electron-log");
+
+const OPENAI_BASE_URL = "https://api.openai.com/v1";
+const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
+
+/**
+ * The response from the initModel function.
+ * @property error - An error message if the model fails to load.
+ */
+interface InitModelResponse {
+  error?: any;
+  modelFile?: string;
+}
+// /root/engine/nitro.json
+
+/**
+ * Initializes a Nitro subprocess to load a machine learning model.
+ * @param modelFile - The name of the machine learning model file.
+ * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
+ */
+function initModel(wrapper: any): Promise<InitModelResponse> {
+  const engine_settings = {
+    ...wrapper.settings,
+  };
+
+  return (
+  )
+}
+
+module.exports = {
+  initModel,
+};

From 6d3bf24d5caeed14dcf60ff8f5035324ee608b5c Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 18:30:47 +0700
Subject: [PATCH 06/58] chore: remove gitkeep

---
 extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep  | 0
 extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep | 0
 extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep  | 0
 extensions/inference-nitro-extension/bin/mac-x64/.gitkeep    | 0
 extensions/inference-nitro-extension/bin/win-cpu/.gitkeep    | 0
 extensions/inference-nitro-extension/bin/win-cuda/.gitkeep   | 0
 6 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep
 delete mode 100644 extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep
 delete mode 100644 extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep
 delete mode 100644 extensions/inference-nitro-extension/bin/mac-x64/.gitkeep
 delete mode 100644 extensions/inference-nitro-extension/bin/win-cpu/.gitkeep
 delete mode 100644 extensions/inference-nitro-extension/bin/win-cuda/.gitkeep

diff --git a/extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/extensions/inference-nitro-extension/bin/mac-x64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-x64/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/extensions/inference-nitro-extension/bin/win-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/win-cpu/.gitkeep
deleted file mode 100644
index e69de29bb..000000000
diff --git a/extensions/inference-nitro-extension/bin/win-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/win-cuda/.gitkeep
deleted file mode 100644
index e69de29bb..000000000

From a985626f293ffd7659239ef8e041e585a45fc17e Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 18:33:02 +0700
Subject: [PATCH 07/58] feat: Add openai gpt-3.5 model.json

---
 models/openai-gpt-3.5/model.json | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 models/openai-gpt-3.5/model.json

diff --git a/models/openai-gpt-3.5/model.json b/models/openai-gpt-3.5/model.json
new file mode 100644
index 000000000..8d5060e6c
--- /dev/null
+++ b/models/openai-gpt-3.5/model.json
@@ -0,0 +1,19 @@
+{
+    "source_url": "https://api.openai.com/v1",
+    "id": "openai-gpt35",
+    "object": "model",
+    "name": "OpenAI GPT 3.5",
+    "version": 1.0,
+    "description": "OpenAI GPT 3.5 model is extremely good",
+    "format": "api",
+    "settings": {},
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "OpenAI",
+      "tags": ["General", "Big Context Length"]
+    },
+    "engine": "openai"
+}
+  
\ No newline at end of file

From 5f8e2ae54cac67869a99f00004a160efb7fd838a Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 1 Dec 2023 18:33:29 +0700
Subject: [PATCH 08/58] fix: Add engine llama.cpp to local models

---
 models/capybara-34b/model.json        | 3 ++-
 models/deepseek-coder-1.3b/model.json | 4 +++-
 models/lzlv-70b/model.json            | 3 ++-
 models/neural-chat-7b/model.json      | 3 ++-
 models/openorca-13b/model.json        | 3 ++-
 models/phind-34b/model.json           | 3 ++-
 models/rocket-3b/model.json           | 3 ++-
 models/starling-7b/model.json         | 3 ++-
 models/tiefighter-13b/model.json      | 3 ++-
 models/tinyllama-1.1b/model.json      | 3 ++-
 models/wizardcoder-13b/model.json     | 3 ++-
 models/yi-34b/model.json              | 3 ++-
 models/zephyr-beta-7b/model.json      | 3 ++-
 13 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json
index 4ac9899f1..09e6e64a0 100644
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@@ -19,6 +19,7 @@
       "author": "NousResearch, The Bloke",
       "tags": ["34B", "Finetuned"],
       "size": 24320000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json
index 4bab24ae2..70f86ec89 100644
--- a/models/deepseek-coder-1.3b/model.json
+++ b/models/deepseek-coder-1.3b/model.json
@@ -1,3 +1,4 @@
+
 {
     "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf",
     "id": "deepseek-coder-1.3b",
@@ -19,5 +20,6 @@
       "author": "Deepseek, The Bloke",
       "tags": ["Tiny", "Foundational Model"],
       "size": 1430000000
-    }
+    },
+    "engine": "nitro"
   }
diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json
index 621d980ab..249171fcf 100644
--- a/models/lzlv-70b/model.json
+++ b/models/lzlv-70b/model.json
@@ -19,6 +19,7 @@
       "author": "Lizpreciatior, The Bloke",
       "tags": ["70B", "Finetuned"],
       "size": 48750000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/neural-chat-7b/model.json b/models/neural-chat-7b/model.json
index dfccf073d..634f9efe9 100644
--- a/models/neural-chat-7b/model.json
+++ b/models/neural-chat-7b/model.json
@@ -19,6 +19,7 @@
       "author": "Intel, The Bloke",
       "tags": ["Recommended", "7B", "Finetuned"],
       "size": 4370000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/openorca-13b/model.json b/models/openorca-13b/model.json
index fc4773371..edb0e24af 100644
--- a/models/openorca-13b/model.json
+++ b/models/openorca-13b/model.json
@@ -19,6 +19,7 @@
       "author": "Microsoft, The Bloke",
       "tags": ["13B", "Finetuned"],
       "size": 9230000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json
index 40d53b77a..dd68b4771 100644
--- a/models/phind-34b/model.json
+++ b/models/phind-34b/model.json
@@ -19,6 +19,7 @@
       "author": "Phind, The Bloke",
       "tags": ["34B", "Finetuned"],
       "size": 24320000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json
index c40ee6258..4c8f29454 100644
--- a/models/rocket-3b/model.json
+++ b/models/rocket-3b/model.json
@@ -19,5 +19,6 @@
       "author": "Pansophic, The Bloke",
       "tags": ["Tiny", "Finetuned"],
       "size": 1710000000
-    }
+    },
+    "engine": "llama.cpp"
   }  
\ No newline at end of file
diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json
index de7693673..a9e237ec3 100644
--- a/models/starling-7b/model.json
+++ b/models/starling-7b/model.json
@@ -19,6 +19,7 @@
       "author": "Berkeley-nest, The Bloke",
       "tags": ["Recommended", "7B","Finetuned"],
       "size": 4370000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/tiefighter-13b/model.json b/models/tiefighter-13b/model.json
index c7f209b77..a0fcdb04e 100644
--- a/models/tiefighter-13b/model.json
+++ b/models/tiefighter-13b/model.json
@@ -19,6 +19,7 @@
       "author": "KoboldAI, The Bloke",
       "tags": ["13B", "Finetuned"],
       "size": 9230000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json
index ace0ca6a0..40bcf4c14 100644
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@@ -19,5 +19,6 @@
       "author": "TinyLlama",
       "tags": ["Tiny", "Foundation Model"],
       "size": 637000000
-  }
+  },
+  "engine": "llama.cpp"
 }
\ No newline at end of file
diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json
index 63cbd174b..1b86632e9 100644
--- a/models/wizardcoder-13b/model.json
+++ b/models/wizardcoder-13b/model.json
@@ -19,6 +19,7 @@
       "author": "WizardLM, The Bloke",
       "tags": ["Recommended", "13B", "Finetuned"],
       "size": 9230000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json
index 7c6da2f03..103e048f9 100644
--- a/models/yi-34b/model.json
+++ b/models/yi-34b/model.json
@@ -19,6 +19,7 @@
       "author": "01-ai, The Bloke",
       "tags": ["34B", "Foundational Model"],
       "size": 24320000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file
diff --git a/models/zephyr-beta-7b/model.json b/models/zephyr-beta-7b/model.json
index f1ef4eb20..a846f6b1c 100644
--- a/models/zephyr-beta-7b/model.json
+++ b/models/zephyr-beta-7b/model.json
@@ -19,6 +19,7 @@
       "author": "HuggingFaceH4, The Bloke",
       "tags": ["7B", "Finetuned"],
       "size": 4370000000
-    }
+    },
+    "engine": "llama.cpp"
   }
   
\ No newline at end of file

From 68ee6693bac998c7e6077079a6d5262e43011192 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sun, 3 Dec 2023 14:34:49 +0700
Subject: [PATCH 09/58] chore: Remove default nitro.json file

---
 extensions/inference-nitro-extension/nitro.json | 6 ------
 1 file changed, 6 deletions(-)
 delete mode 100644 extensions/inference-nitro-extension/nitro.json

diff --git a/extensions/inference-nitro-extension/nitro.json b/extensions/inference-nitro-extension/nitro.json
deleted file mode 100644
index 8b01cb908..000000000
--- a/extensions/inference-nitro-extension/nitro.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "ctx_len": 2048,
-    "ngl": 100,
-    "cont_batching": false,
-    "embedding": false
-}
\ No newline at end of file

From 9a18d3133ca219ace52457640526ee4993879948 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sun, 3 Dec 2023 14:35:22 +0700
Subject: [PATCH 10/58] chore: refactor openai file structure

---
 .../inference-openai-extension/package.json   | 22 ++----------
 .../inference-openai-extension/src/module.ts  | 34 -------------------
 2 files changed, 3 insertions(+), 53 deletions(-)
 delete mode 100644 extensions/inference-openai-extension/src/module.ts

diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json
index 5d5dac264..c32027ca8 100644
--- a/extensions/inference-openai-extension/package.json
+++ b/extensions/inference-openai-extension/package.json
@@ -1,21 +1,14 @@
 {
   "name": "@janhq/inference-openai-extension",
   "version": "1.0.0",
-  "description": "Inference Engine for OpenAI Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.",
+  "description": "Inference Engine for OpenAI Extension that can be used with any OpenAI compatible API",
   "main": "dist/index.js",
   "module": "dist/module.js",
   "author": "Jan <service@jan.ai>",
   "license": "AGPL-3.0",
   "scripts": {
     "build": "tsc -b . && webpack --config webpack.config.js",
-    "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh",
-    "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
-    "downloadnitro:win32": "download.bat",
-    "downloadnitro": "run-script-os",
-    "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install",
-    "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install",
-    "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" &&  npm pack && cpx *.tgz ../../electron/pre-install",
-    "build:publish": "run-script-os"
+    "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install"
   },
   "exports": {
     ".": "./dist/index.js",
@@ -24,19 +17,13 @@
   "devDependencies": {
     "cpx": "^1.5.0",
     "rimraf": "^3.0.2",
-    "run-script-os": "^1.1.6",
     "webpack": "^5.88.2",
     "webpack-cli": "^5.1.4"
   },
   "dependencies": {
     "@janhq/core": "file:../../core",
-    "download-cli": "^1.1.1",
-    "electron-log": "^5.0.1",
     "fetch-retry": "^5.0.6",
-    "kill-port": "^2.0.1",
     "path-browserify": "^1.0.1",
-    "rxjs": "^7.8.1",
-    "tcp-port-used": "^1.0.2",
     "ts-loader": "^9.5.0",
     "ulid": "^2.3.0"
   },
@@ -49,9 +36,6 @@
     "README.md"
   ],
   "bundleDependencies": [
-    "tcp-port-used",
-    "kill-port",
-    "fetch-retry",
-    "electron-log"
+    "fetch-retry"
   ]
 }
diff --git a/extensions/inference-openai-extension/src/module.ts b/extensions/inference-openai-extension/src/module.ts
deleted file mode 100644
index 305c2e804..000000000
--- a/extensions/inference-openai-extension/src/module.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-const fetchRetry = require("fetch-retry")(global.fetch);
-
-const log = require("electron-log");
-
-const OPENAI_BASE_URL = "https://api.openai.com/v1";
-const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
-
-/**
- * The response from the initModel function.
- * @property error - An error message if the model fails to load.
- */
-interface InitModelResponse {
-  error?: any;
-  modelFile?: string;
-}
-// /root/engine/nitro.json
-
-/**
- * Initializes a Nitro subprocess to load a machine learning model.
- * @param modelFile - The name of the machine learning model file.
- * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
- */
-function initModel(wrapper: any): Promise<InitModelResponse> {
-  const engine_settings = {
-    ...wrapper.settings,
-  };
-
-  return (
-  )
-}
-
-module.exports = {
-  initModel,
-};

From 8ab36d7cb232edb9d2bd522ab4de4e108c1ab958 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sun, 3 Dec 2023 14:35:54 +0700
Subject: [PATCH 11/58] feat: Add openai engine json reader and writer

---
 .../inference-openai-extension/src/index.ts   | 80 ++++++++++++-------
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 652f47b6c..4fd16a414 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -19,7 +19,7 @@ import {
   events,
   executeOnMain,
   getUserSpace,
-  fs
+  fs,
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
@@ -31,7 +31,7 @@ import { join } from "path";
  * The class provides methods for initializing and stopping a model, and for making inference requests.
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
-export default class JanInferenceExtension implements InferenceExtension {
+export default class JanInferenceOpenAIExtension implements InferenceExtension {
   private static readonly _homeDir = 'engines'
   private static readonly _engineMetadataFileName = 'openai.json'
   
@@ -50,10 +50,10 @@ export default class JanInferenceExtension implements InferenceExtension {
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
-    fs.mkdir(JanInferenceExtension._homeDir)
-    // TODO: Copy nitro.json to janroot/engine/nitro.json
+    fs.mkdir(JanInferenceOpenAIExtension._homeDir)
+    this.writeDefaultEngineSettings()
     events.on(EventName.OnMessageSent, (data) =>
-      JanInferenceExtension.handleMessageRequest(data, this)
+    JanInferenceOpenAIExtension.handleMessageRequest(data, this)
     );
   }
 
@@ -80,6 +80,26 @@ export default class JanInferenceExtension implements InferenceExtension {
     });
   }
 
+  private async writeDefaultEngineSettings() {
+    try {
+      
+      const destPath = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName)
+      // TODO: Check with @louis for adding new binding
+      // if (await fs.checkFileExists(destPath)) {
+        const default_engine_settings = {
+          "base_url": "https://api.openai.com/v1",
+          "api_key": "sk-<your key here>"
+        }
+        console.log(`Writing OpenAI engine settings to ${destPath}`)
+        await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2))
+      // }
+      // else { 
+      //   console.log(`OpenAI engine settings already exist at ${destPath}`)
+      // }
+    } catch (err) {
+      console.error(err)
+    }
+  }
   /**
    * Stops the model.
    * @returns {Promise<void>} A promise that resolves when the model is stopped.
@@ -95,37 +115,35 @@ export default class JanInferenceExtension implements InferenceExtension {
     this.controller?.abort();
   }
 
-  private async copyModelsToHomeDir() {
-    try {
-    // list all of the files under the home directory
-    const files = await fs.listFiles('')
-    
-    if (files.includes(JanInferenceExtension._homeDir)) {
-      // ignore if the model is already downloaded
-      console.debug('Model already downloaded')
-    return
-    }
-    
-    // copy models folder from resources to home directory
-    const resourePath = await getResourcePath()
-    const srcPath = join(resourePath, 'models')
-    
-    const userSpace = await getUserSpace()
-    const destPath = join(userSpace, JanInferenceExtension._homeDir)
-    
-    await fs.copyFile(srcPath, destPath)
-    } catch (err) {
-      console.error(err)
-    }
-    }
-
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
    * @returns {Promise<any>} A promise that resolves with the inference response.
    */
   async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
-    // TODO: @louis
+    const timestamp = Date.now();
+    const message: ThreadMessage = {
+      thread_id: data.threadId,
+      created: timestamp,
+      updated: timestamp,
+      status: MessageStatus.Ready,
+      id: "",
+      role: ChatCompletionRole.Assistant,
+      object: "thread.message",
+      content: [],
+    };
+
+    return new Promise(async (resolve, reject) => {
+      requestInference(data.messages ?? []).subscribe({
+        next: (_content) => {},
+        complete: async () => {
+          resolve(message);
+        },
+        error: async (err) => {
+          reject(err);
+        },
+      });
+    });
   }
 
   /**
@@ -136,7 +154,7 @@ export default class JanInferenceExtension implements InferenceExtension {
    */
   private static async handleMessageRequest(
     data: MessageRequest,
-    instance: JanInferenceExtension
+    instance: JanInferenceOpenAIExtension
   ) {
     const timestamp = Date.now();
     const message: ThreadMessage = {

From 56b778675f46865a039b4971ef073b679d2b7e33 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sun, 3 Dec 2023 14:36:01 +0700
Subject: [PATCH 12/58] feat: Add nitro engine json reader and writer

---
 .../inference-nitro-extension/src/index.ts    | 42 +++++++++++++------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 0a1011772..17f207ab9 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -19,7 +19,7 @@ import {
   events,
   executeOnMain,
   getUserSpace,
-  fs
+  fs,
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
@@ -31,7 +31,7 @@ import { join } from "path";
  * The class provides methods for initializing and stopping a model, and for making inference requests.
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
-export default class JanInferenceExtension implements InferenceExtension {
+export default class JanInferenceNitroExtension implements InferenceExtension {
   private static readonly _homeDir = 'engines'
   private static readonly _engineMetadataFileName = 'nitro.json'
 
@@ -49,10 +49,10 @@ export default class JanInferenceExtension implements InferenceExtension {
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
-    fs.mkdir(JanInferenceExtension._homeDir)
-
+    fs.mkdir(JanInferenceNitroExtension._homeDir)
+    this.writeDefaultEngineSettings()
     events.on(EventName.OnMessageSent, (data) =>
-      JanInferenceExtension.handleMessageRequest(data, this)
+    JanInferenceNitroExtension.handleMessageRequest(data, this)
     );
   }
 
@@ -74,14 +74,10 @@ export default class JanInferenceExtension implements InferenceExtension {
   ): Promise<void> {
     const userSpacePath = await getUserSpace();
     const modelFullPath = join(userSpacePath, "models", modelId, modelId);
-    let engine_settings = JSON.parse(await fs.readFile(join(JanInferenceExtension._homeDir, JanInferenceExtension._engineMetadataFileName)))
-    engine_settings = {
-      engine_settings
-      ...settings,     
-    };
+    
     return executeOnMain(MODULE, "initModel", {
       modelFullPath,
-      engine_settings,
+      settings,
     });
   }
 
@@ -102,6 +98,28 @@ export default class JanInferenceExtension implements InferenceExtension {
     this.controller?.abort();
   }
 
+  private async writeDefaultEngineSettings() {
+    try {
+      const destPath = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName)
+      // TODO: Check with @louis for adding new binding
+      // if (await fs.checkFileExists(destPath)) {
+        const default_engine_settings = {
+          "ctx_len": 2048,
+          "ngl": 100,
+          "cont_batching": false,
+          "embedding": false
+        }
+        console.log(`Writing nitro engine settings to ${destPath}`)
+        await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2))
+      // }
+      // else {
+      //   console.log(`Using existing nitro engine settings at ${destPath}`)
+      // }
+    } catch (err) {
+      console.error(err)
+    }
+  }
+
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
@@ -141,7 +159,7 @@ export default class JanInferenceExtension implements InferenceExtension {
    */
   private static async handleMessageRequest(
     data: MessageRequest,
-    instance: JanInferenceExtension
+    instance: JanInferenceNitroExtension
   ) {
     const timestamp = Date.now();
     const message: ThreadMessage = {

From 9aca37a30c1ee106a0ad7c1f4aab5b7f96b87edd Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 12:09:49 +0700
Subject: [PATCH 13/58] chore: Add fs abstraction for checkFileExists

---
 core/src/fs.ts          | 35 ++++++++++++++++++-----------------
 electron/handlers/fs.ts | 13 +++++++++++++
 electron/invokers/fs.ts |  6 ++++++
 3 files changed, 37 insertions(+), 17 deletions(-)

diff --git a/core/src/fs.ts b/core/src/fs.ts
index 4013479dd..99c8cbccf 100644
--- a/core/src/fs.ts
+++ b/core/src/fs.ts
@@ -5,52 +5,52 @@
  * @returns {Promise<any>} A Promise that resolves when the file is written successfully.
  */
 const writeFile: (path: string, data: string) => Promise<any> = (path, data) =>
-  global.core.api?.writeFile(path, data);
+  global.core.api?.writeFile(path, data)
 
 /**
  * Checks whether the path is a directory.
  * @param path - The path to check.
  * @returns {boolean} A boolean indicating whether the path is a directory.
  */
-const isDirectory = (path: string): Promise<boolean> =>
-  global.core.api?.isDirectory(path);
+const isDirectory = (path: string): Promise<boolean> => global.core.api?.isDirectory(path)
 
 /**
  * Reads the contents of a file at the specified path.
  * @param {string} path - The path of the file to read.
  * @returns {Promise<any>} A Promise that resolves with the contents of the file.
  */
-const readFile: (path: string) => Promise<any> = (path) =>
-  global.core.api?.readFile(path);
+const readFile: (path: string) => Promise<any> = (path) => global.core.api?.readFile(path)
+/**
+ * Check whether the file exists
+ * @param {string} path
+ * @returns {boolean} A boolean indicating whether the path is a file.
+ */
+const checkFileExists = (path: string): Promise<boolean> => global.core.api?.checkFileExists(path)
 /**
  * List the directory files
  * @param {string} path - The path of the directory to list files.
  * @returns {Promise<any>} A Promise that resolves with the contents of the directory.
  */
-const listFiles: (path: string) => Promise<any> = (path) =>
-  global.core.api?.listFiles(path);
+const listFiles: (path: string) => Promise<any> = (path) => global.core.api?.listFiles(path)
 /**
  * Creates a directory at the specified path.
  * @param {string} path - The path of the directory to create.
  * @returns {Promise<any>} A Promise that resolves when the directory is created successfully.
  */
-const mkdir: (path: string) => Promise<any> = (path) =>
-  global.core.api?.mkdir(path);
+const mkdir: (path: string) => Promise<any> = (path) => global.core.api?.mkdir(path)
 
 /**
  * Removes a directory at the specified path.
  * @param {string} path - The path of the directory to remove.
  * @returns {Promise<any>} A Promise that resolves when the directory is removed successfully.
  */
-const rmdir: (path: string) => Promise<any> = (path) =>
-  global.core.api?.rmdir(path);
+const rmdir: (path: string) => Promise<any> = (path) => global.core.api?.rmdir(path)
 /**
  * Deletes a file from the local file system.
  * @param {string} path - The path of the file to delete.
  * @returns {Promise<any>} A Promise that resolves when the file is deleted.
  */
-const deleteFile: (path: string) => Promise<any> = (path) =>
-  global.core.api?.deleteFile(path);
+const deleteFile: (path: string) => Promise<any> = (path) => global.core.api?.deleteFile(path)
 
 /**
  * Appends data to a file at the specified path.
@@ -58,10 +58,10 @@ const deleteFile: (path: string) => Promise<any> = (path) =>
  * @param data data to append
  */
 const appendFile: (path: string, data: string) => Promise<any> = (path, data) =>
-  global.core.api?.appendFile(path, data);
+  global.core.api?.appendFile(path, data)
 
 const copyFile: (src: string, dest: string) => Promise<any> = (src, dest) =>
-  global.core.api?.copyFile(src, dest);
+  global.core.api?.copyFile(src, dest)
 
 /**
  * Reads a file line by line.
@@ -69,12 +69,13 @@ const copyFile: (src: string, dest: string) => Promise<any> = (src, dest) =>
  * @returns {Promise<any>} A promise that resolves to the lines of the file.
  */
 const readLineByLine: (path: string) => Promise<any> = (path) =>
-  global.core.api?.readLineByLine(path);
+  global.core.api?.readLineByLine(path)
 
 export const fs = {
   isDirectory,
   writeFile,
   readFile,
+  checkFileExists,
   listFiles,
   mkdir,
   rmdir,
@@ -82,4 +83,4 @@ export const fs = {
   appendFile,
   readLineByLine,
   copyFile,
-};
+}
diff --git a/electron/handlers/fs.ts b/electron/handlers/fs.ts
index 16cef6eb6..1e2df5c59 100644
--- a/electron/handlers/fs.ts
+++ b/electron/handlers/fs.ts
@@ -50,6 +50,19 @@ export function handleFsIPCs() {
     })
   })
 
+  /**
+   * Checks whether a file exists in the user data directory.
+   * @param event - The event object.
+   * @param path - The path of the file to check.
+   * @returns A promise that resolves with a boolean indicating whether the file exists.
+   */
+  ipcMain.handle('checkFileExists', async (_event, path: string) => {
+    return new Promise((resolve, reject) => {
+      const fullPath = join(userSpacePath, path)
+      fs.existsSync(fullPath) ? resolve(true) : resolve(false)
+    })
+  })
+
   /**
    * Writes data to a file in the user data directory.
    * @param event - The event object.
diff --git a/electron/invokers/fs.ts b/electron/invokers/fs.ts
index 309562ad6..e59eb4c86 100644
--- a/electron/invokers/fs.ts
+++ b/electron/invokers/fs.ts
@@ -27,6 +27,12 @@ export function fsInvokers() {
      */
     readFile: (path: string) => ipcRenderer.invoke('readFile', path),
 
+    /**
+     * Reads a file at the specified path.
+     * @param {string} path - The path of the file to read.
+     */
+    checkFileExists: (path: string) => ipcRenderer.invoke('checkFileExists', path),
+    
     /**
      * Writes data to a file at the specified path.
      * @param {string} path - The path of the file to write to.

From 1bc5fe64f3bf4e3deb390a0690a5425cce41dcfe Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 12:10:24 +0700
Subject: [PATCH 14/58] fix: Use Events for init, load, stop models

---
 core/src/events.ts                            |  8 +++
 core/src/types/index.ts                       | 15 +++++
 web/containers/Providers/EventHandler.tsx     | 49 +++++++++++++++-
 web/hooks/useActiveModel.ts                   | 56 ++-----------------
 .../ExploreModelItemHeader/index.tsx          | 20 ++++++-
 5 files changed, 93 insertions(+), 55 deletions(-)

diff --git a/core/src/events.ts b/core/src/events.ts
index f588daad7..81451c1f0 100644
--- a/core/src/events.ts
+++ b/core/src/events.ts
@@ -8,6 +8,14 @@ export enum EventName {
   OnMessageResponse = "OnMessageResponse",
   /** The `OnMessageUpdate` event is emitted when a message is updated. */
   OnMessageUpdate = "OnMessageUpdate",
+  /** The `OnModelInit` event is emitted when a model inits. */
+  OnModelInit = "OnModelInit",
+  /** The `OnModelReady` event is emitted when a model ready. */
+  OnModelReady = "OnModelReady",
+  /** The `OnModelFail` event is emitted when a model fails loading. */
+  OnModelFail = "OnModelFail",
+  /** The `OnModelStop` event is emitted when a model fails loading. */
+  OnModelStop = "OnModelStop",
 }
 
 /**
diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index 87343aa65..5b45d4cc8 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -166,6 +166,17 @@ export type ThreadState = {
   error?: Error;
   lastMessage?: string;
 };
+/**
+ * Represents the inference engine.
+ * @stored
+ */
+
+enum InferenceEngine {
+  llama_cpp = "llama_cpp",
+  openai = "openai",
+  nvidia_triton = "nvidia_triton",
+  hf_endpoint = "hf_endpoint",
+}
 
 /**
  * Model type defines the shape of a model object.
@@ -234,6 +245,10 @@ export interface Model {
    * Metadata of the model.
    */
   metadata: ModelMetadata;
+  /**
+   * The model engine. Enum: "llamacpp" "openai"
+   */
+  engine: InferenceEngine;
 }
 
 export type ModelMetadata = {
diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index 46f4b19d4..a3910e266 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -7,9 +7,10 @@ import {
   ThreadMessage,
   ExtensionType,
   MessageStatus,
+  Model
 } from '@janhq/core'
 import { ConversationalExtension } from '@janhq/core'
-import { useAtomValue, useSetAtom } from 'jotai'
+import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
 
 import { extensionManager } from '@/extension'
 import {
@@ -21,9 +22,16 @@ import {
   threadsAtom,
 } from '@/helpers/atoms/Conversation.atom'
 
+import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
+import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
+import { toaster } from '../Toast'
+
 export default function EventHandler({ children }: { children: ReactNode }) {
   const addNewMessage = useSetAtom(addNewMessageAtom)
   const updateMessage = useSetAtom(updateMessageAtom)
+  const { downloadedModels } = useGetDownloadedModels()
+  const [activeModel, setActiveModel] = useAtom(activeModelAtom)
+  const [stateModel, setStateModel] = useAtom(stateModelAtom)
 
   const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
   const threads = useAtomValue(threadsAtom)
@@ -37,6 +45,42 @@ export default function EventHandler({ children }: { children: ReactNode }) {
     addNewMessage(message)
   }
 
+  async function handleModelReady(res: any) {
+      const model = downloadedModels.find((e) => e.id === res.modelId)
+      setActiveModel(model)
+      toaster({
+        title: 'Success!',
+        description: `Model ${res.modelId} has been started.`,
+      })
+      setStateModel(() => ({
+        state: 'stop',
+        loading: false,
+        model: res.modelId,
+      }))
+    }
+
+  async function handleModelStop(res: any) {
+    const model = downloadedModels.find((e) => e.id === res.modelId)
+    setTimeout(async () => {
+      setActiveModel(undefined)
+      setStateModel({ state: 'start', loading: false, model: '' })
+      toaster({
+        title: 'Success!',
+        description: `Model ${res.modelId} has been stopped.`,
+      })
+    }, 500)
+  }
+
+  async function handleModelFail(res: any) {
+      const errorMessage = `${res.error}`
+      alert(errorMessage)
+      setStateModel(() => ({
+        state: 'start',
+        loading: false,
+        model: res.modelId,
+      }))
+  }
+
   async function handleMessageResponseUpdate(message: ThreadMessage) {
     updateMessage(
       message.id,
@@ -73,6 +117,9 @@ export default function EventHandler({ children }: { children: ReactNode }) {
     if (window.core.events) {
       events.on(EventName.OnMessageResponse, handleNewMessageResponse)
       events.on(EventName.OnMessageUpdate, handleMessageResponseUpdate)
+      events.on(EventName.OnModelReady, handleModelReady)
+      events.on(EventName.OnModelFail, handleModelFail)
+      events.on(EventName.OnModelStop, handleModelStop)
     }
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 60be0f2c4..4f1565e15 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,5 +1,5 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
-import { ExtensionType, InferenceExtension } from '@janhq/core'
+import { EventName, ExtensionType, InferenceExtension, events } from '@janhq/core'
 import { Model, ModelSettingParams } from '@janhq/core'
 import { atom, useAtom } from 'jotai'
 
@@ -9,9 +9,9 @@ import { useGetDownloadedModels } from './useGetDownloadedModels'
 
 import { extensionManager } from '@/extension'
 
-const activeModelAtom = atom<Model | undefined>(undefined)
+export const activeModelAtom = atom<Model | undefined>(undefined)
 
-const stateModelAtom = atom({ state: 'start', loading: false, model: '' })
+export const stateModelAtom = atom({ state: 'start', loading: false, model: '' })
 
 export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)
@@ -47,59 +47,13 @@ export function useActiveModel() {
       return
     }
 
-    const currentTime = Date.now()
-    const res = await initModel(modelId, model?.settings)
-    if (res && res.error) {
-      const errorMessage = `${res.error}`
-      alert(errorMessage)
-      setStateModel(() => ({
-        state: 'start',
-        loading: false,
-        model: modelId,
-      }))
-    } else {
-      console.debug(
-        `Model ${modelId} successfully initialized! Took ${
-          Date.now() - currentTime
-        }ms`
-      )
-      setActiveModel(model)
-      toaster({
-        title: 'Success!',
-        description: `Model ${modelId} has been started.`,
-      })
-      setStateModel(() => ({
-        state: 'stop',
-        loading: false,
-        model: modelId,
-      }))
-    }
+    events.emit(EventName.OnModelInit, model)
   }
 
   const stopModel = async (modelId: string) => {
     setStateModel({ state: 'stop', loading: true, model: modelId })
-    setTimeout(async () => {
-      extensionManager
-        .get<InferenceExtension>(ExtensionType.Inference)
-        ?.stopModel()
-
-      setActiveModel(undefined)
-      setStateModel({ state: 'start', loading: false, model: '' })
-      toaster({
-        title: 'Success!',
-        description: `Model ${modelId} has been stopped.`,
-      })
-    }, 500)
+    events.emit(EventName.OnModelStop, modelId)
   }
 
   return { activeModel, startModel, stopModel, stateModel }
 }
-
-const initModel = async (
-  modelId: string,
-  settings?: ModelSettingParams
-): Promise<any> => {
-  return extensionManager
-    .get<InferenceExtension>(ExtensionType.Inference)
-    ?.initModel(modelId, settings)
-}
diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index ba23056c6..f5d54f0be 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -55,9 +55,23 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
 
-  let downloadButton = (
-    <Button onClick={() => onDownloadClick()}>Download</Button>
-  )
+  let downloadButton;
+
+  if (model.engine !== 'nitro') {
+    downloadButton = (
+      <Button onClick={() => onDownloadClick()}>
+        Use
+      </Button>
+    );
+  } else if (model.engine === 'nitro') {
+    downloadButton = (
+      <Button onClick={() => onDownloadClick()}>
+        {model.metadata.size
+          ? `Download (${toGigabytes(model.metadata.size)})`
+          : 'Download'}
+      </Button>
+    );
+  }
 
   const onUseModelClick = () => {
     startModel(model.id)

From 2c648caa5f7f4d0d80ab360601d0d322b2eed69e Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 12:10:46 +0700
Subject: [PATCH 15/58] fix: Update nitro with read/ write for engine.json

---
 .../inference-nitro-extension/src/index.ts    | 36 +++++++++++--------
 .../inference-nitro-extension/src/module.ts   |  4 ++-
 2 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 17f207ab9..614c32586 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -26,6 +26,13 @@ import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
 import { join } from "path";
 
+interface EngineSettings {
+  ctx_len: number;
+  ngl: number;
+  cont_batching: boolean;
+  embedding: boolean;
+}
+
 /**
  * A class that implements the InferenceExtension interface from the @janhq/core package.
  * The class provides methods for initializing and stopping a model, and for making inference requests.
@@ -35,6 +42,13 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   private static readonly _homeDir = 'engines'
   private static readonly _engineMetadataFileName = 'nitro.json'
 
+  private _engineSettings: EngineSettings = {
+    "ctx_len": 2048,
+    "ngl": 100,
+    "cont_batching": false,
+    "embedding": false
+  }
+
   controller = new AbortController();
   isCancelled = false;
   /**
@@ -100,21 +114,13 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
 
   private async writeDefaultEngineSettings() {
     try {
-      const destPath = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName)
-      // TODO: Check with @louis for adding new binding
-      // if (await fs.checkFileExists(destPath)) {
-        const default_engine_settings = {
-          "ctx_len": 2048,
-          "ngl": 100,
-          "cont_batching": false,
-          "embedding": false
-        }
-        console.log(`Writing nitro engine settings to ${destPath}`)
-        await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2))
-      // }
-      // else {
-      //   console.log(`Using existing nitro engine settings at ${destPath}`)
-      // }
+      const engine_json = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName)
+      if (await fs.checkFileExists(engine_json)) {
+        this._engineSettings = JSON.parse(await fs.readFile(engine_json))
+      }
+      else {
+        await fs.writeFile(engine_json, JSON.stringify(this._engineSettings, null, 2))
+      }
     } catch (err) {
       console.error(err)
     }
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index e9b2d8eb5..3eeedec32 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -36,6 +36,9 @@ interface InitModelResponse {
  * TODO: Should it be startModel instead?
  */
 function initModel(wrapper: any): Promise<InitModelResponse> {
+  if (wrapper.settings.engine !== "llamacpp") {
+    return
+  }
   // 1. Check if the model file exists
   currentModelFile = wrapper.modelFullPath;
   log.info("Started to load model " + wrapper.modelFullPath);
@@ -45,7 +48,6 @@ function initModel(wrapper: any): Promise<InitModelResponse> {
     ...wrapper.settings,
   };
 
-
   log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
 
   return (

From 486c5b8ca0c62817ef035e3148a09a815d53ff2d Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 12:10:52 +0700
Subject: [PATCH 16/58] fix: Update openai with read/ write for engine.json

---
 .../inference-openai-extension/src/index.ts   | 68 ++++++++++++-------
 1 file changed, 45 insertions(+), 23 deletions(-)

diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 4fd16a414..8d72422d3 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -20,12 +20,18 @@ import {
   executeOnMain,
   getUserSpace,
   fs,
+  Model,
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
 import { join } from "path";
 
+interface EngineSettings {
+  base_url?: string;
+  api_key?: string;
+}
+
 /**
  * A class that implements the InferenceExtension interface from the @janhq/core package.
  * The class provides methods for initializing and stopping a model, and for making inference requests.
@@ -35,6 +41,10 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
   private static readonly _homeDir = 'engines'
   private static readonly _engineMetadataFileName = 'openai.json'
   
+  private _engineSettings: EngineSettings = {
+    "base_url": "https://api.openai.com/v1",
+    "api_key": "sk-<your key here>"
+  }
   controller = new AbortController();
   isCancelled = false;
   /**
@@ -52,9 +62,19 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
   onLoad(): void {
     fs.mkdir(JanInferenceOpenAIExtension._homeDir)
     this.writeDefaultEngineSettings()
+
+    // Events subscription
     events.on(EventName.OnMessageSent, (data) =>
-    JanInferenceOpenAIExtension.handleMessageRequest(data, this)
+      JanInferenceOpenAIExtension.handleMessageRequest(data, this)
     );
+
+    events.on(EventName.OnModelInit, (data: Model) => {
+      JanInferenceOpenAIExtension.handleModelInit(data);
+    });
+
+    events.on(EventName.OnModelStop, (data: Model) => {
+      JanInferenceOpenAIExtension.handleModelStop(data);
+    });
   }
 
   /**
@@ -71,31 +91,18 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     modelId: string,
     settings?: ModelSettingParams
   ): Promise<void> {
-    const userSpacePath = await getUserSpace();
-    const modelFullPath = join(userSpacePath, "models", modelId, modelId);
-
-    return executeOnMain(MODULE, "initModel", {
-      modelFullPath,
-      settings,
-    });
+    return
   }
 
   private async writeDefaultEngineSettings() {
     try {
-      
-      const destPath = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName)
-      // TODO: Check with @louis for adding new binding
-      // if (await fs.checkFileExists(destPath)) {
-        const default_engine_settings = {
-          "base_url": "https://api.openai.com/v1",
-          "api_key": "sk-<your key here>"
-        }
-        console.log(`Writing OpenAI engine settings to ${destPath}`)
-        await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2))
-      // }
-      // else { 
-      //   console.log(`OpenAI engine settings already exist at ${destPath}`)
-      // }
+      const engine_json = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName)
+      if (await fs.checkFileExists(engine_json)) {
+        this._engineSettings = JSON.parse(await fs.readFile(engine_json))
+      }
+      else {
+        await fs.writeFile(engine_json, JSON.stringify(this._engineSettings, null, 2))
+      }
     } catch (err) {
       console.error(err)
     }
@@ -146,6 +153,22 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     });
   }
 
+  private static async handleModelInit(data: Model) {
+    console.log('Model init success', data)
+    // Add filter data engine = openai
+    if (data.engine !== 'openai') { return }
+    // If model success
+    events.emit(EventName.OnModelReady, {modelId: data.id})
+    // If model failed
+    // events.emit(EventName.OnModelFail, {modelId: data.id})
+  }
+
+  private static async handleModelStop(data: Model) {
+    // Add filter data engine = openai
+    if (data.engine !== 'openai') { return }
+    events.emit(EventName.OnModelStop, {modelId: data.id})
+  }
+
   /**
    * Handles a new message request by making an inference request and emitting events.
    * Function registered in event manager, should be static to avoid binding issues.
@@ -169,7 +192,6 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
       object: "thread.message",
     };
     events.emit(EventName.OnMessageResponse, message);
-    console.log(JSON.stringify(data, null, 2));
 
     instance.isCancelled = false;
     instance.controller = new AbortController();

From 22f12cd02c191050a27a71922577f5693f8a4f9b Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 12:11:47 +0700
Subject: [PATCH 17/58] fix: Update model.json for Hub with engine nitro/
 openai

---
 models/capybara-34b/model.json    | 4 ++--
 models/lzlv-70b/model.json        | 2 +-
 models/neural-chat-7b/model.json  | 2 +-
 models/noromaid-20b/model.json    | 3 ++-
 models/openai-gpt-3.5/model.json  | 6 ++++--
 models/openorca-13b/model.json    | 2 +-
 models/phind-34b/model.json       | 2 +-
 models/rocket-3b/model.json       | 2 +-
 models/starling-7b/model.json     | 2 +-
 models/tiefighter-13b/model.json  | 2 +-
 models/tinyllama-1.1b/model.json  | 6 +++---
 models/wizardcoder-13b/model.json | 2 +-
 models/yi-34b/model.json          | 2 +-
 models/zephyr-beta-7b/model.json  | 2 +-
 14 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json
index 09e6e64a0..e4263f957 100644
--- a/models/capybara-34b/model.json
+++ b/models/capybara-34b/model.json
@@ -19,7 +19,7 @@
       "author": "NousResearch, The Bloke",
       "tags": ["34B", "Finetuned"],
       "size": 24320000000
-    },
-    "engine": "llama.cpp"
+    }, 
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json
index 249171fcf..ca6af617e 100644
--- a/models/lzlv-70b/model.json
+++ b/models/lzlv-70b/model.json
@@ -20,6 +20,6 @@
       "tags": ["70B", "Finetuned"],
       "size": 48750000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/neural-chat-7b/model.json b/models/neural-chat-7b/model.json
index 634f9efe9..1d6271469 100644
--- a/models/neural-chat-7b/model.json
+++ b/models/neural-chat-7b/model.json
@@ -20,6 +20,6 @@
       "tags": ["Recommended", "7B", "Finetuned"],
       "size": 4370000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/noromaid-20b/model.json b/models/noromaid-20b/model.json
index 698687d8b..a7c4bceb8 100644
--- a/models/noromaid-20b/model.json
+++ b/models/noromaid-20b/model.json
@@ -19,6 +19,7 @@
       "author": "NeverSleep, The Bloke",
       "tags": ["34B", "Finetuned"],
       "size": 12040000000
-    }
+    },
+    "engine": "llama_cpp"
   }
   
\ No newline at end of file
diff --git a/models/openai-gpt-3.5/model.json b/models/openai-gpt-3.5/model.json
index 8d5060e6c..c794349aa 100644
--- a/models/openai-gpt-3.5/model.json
+++ b/models/openai-gpt-3.5/model.json
@@ -1,12 +1,14 @@
 {
-    "source_url": "https://api.openai.com/v1",
+    "source_url": "https://openai.com",
     "id": "openai-gpt35",
     "object": "model",
     "name": "OpenAI GPT 3.5",
     "version": 1.0,
     "description": "OpenAI GPT 3.5 model is extremely good",
     "format": "api",
-    "settings": {},
+    "settings": {
+      "base_url": "https://api.openai.com/v1"
+    },
     "parameters": {
       "max_tokens": 4096
     },
diff --git a/models/openorca-13b/model.json b/models/openorca-13b/model.json
index edb0e24af..42c8bd96e 100644
--- a/models/openorca-13b/model.json
+++ b/models/openorca-13b/model.json
@@ -20,6 +20,6 @@
       "tags": ["13B", "Finetuned"],
       "size": 9230000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json
index dd68b4771..eb3c1a18a 100644
--- a/models/phind-34b/model.json
+++ b/models/phind-34b/model.json
@@ -20,6 +20,6 @@
       "tags": ["34B", "Finetuned"],
       "size": 24320000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json
index 4c8f29454..b1d338e11 100644
--- a/models/rocket-3b/model.json
+++ b/models/rocket-3b/model.json
@@ -20,5 +20,5 @@
       "tags": ["Tiny", "Finetuned"],
       "size": 1710000000
     },
-    "engine": "llama.cpp"
+    "engine": "llama_cpp"
   }  
\ No newline at end of file
diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json
index a9e237ec3..52b03f8b1 100644
--- a/models/starling-7b/model.json
+++ b/models/starling-7b/model.json
@@ -20,6 +20,6 @@
       "tags": ["Recommended", "7B","Finetuned"],
       "size": 4370000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/tiefighter-13b/model.json b/models/tiefighter-13b/model.json
index a0fcdb04e..20075777c 100644
--- a/models/tiefighter-13b/model.json
+++ b/models/tiefighter-13b/model.json
@@ -20,6 +20,6 @@
       "tags": ["13B", "Finetuned"],
       "size": 9230000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json
index 40bcf4c14..bc5dcb0c6 100644
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@@ -10,7 +10,8 @@
       "ctx_len": 2048,
       "system_prompt": "<|system|>\n",
       "user_prompt": "<|user|>\n",
-      "ai_prompt": "<|assistant|>\n"
+      "ai_prompt": "<|assistant|>\n",
+      "engine": "nitro"
   },
   "parameters": {
       "max_tokens": 2048
@@ -19,6 +20,5 @@
       "author": "TinyLlama",
       "tags": ["Tiny", "Foundation Model"],
       "size": 637000000
-  },
-  "engine": "llama.cpp"
+  }
 }
\ No newline at end of file
diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json
index 1b86632e9..40f275037 100644
--- a/models/wizardcoder-13b/model.json
+++ b/models/wizardcoder-13b/model.json
@@ -20,6 +20,6 @@
       "tags": ["Recommended", "13B", "Finetuned"],
       "size": 9230000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json
index 103e048f9..ab111591c 100644
--- a/models/yi-34b/model.json
+++ b/models/yi-34b/model.json
@@ -20,6 +20,6 @@
       "tags": ["34B", "Foundational Model"],
       "size": 24320000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/zephyr-beta-7b/model.json b/models/zephyr-beta-7b/model.json
index a846f6b1c..4993366fd 100644
--- a/models/zephyr-beta-7b/model.json
+++ b/models/zephyr-beta-7b/model.json
@@ -20,6 +20,6 @@
       "tags": ["7B", "Finetuned"],
       "size": 4370000000
     },
-    "engine": "llama.cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file

From 489c8584a96c4d4ce4bc78cba1ad581464d591c0 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Mon, 4 Dec 2023 12:52:28 +0700
Subject: [PATCH 18/58] chore: models ref event handler

---
 web/containers/Providers/EventHandler.tsx | 60 ++++++++++++-----------
 web/hooks/useActiveModel.ts               | 13 ++++-
 2 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index a3910e266..d24caf34d 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -7,10 +7,14 @@ import {
   ThreadMessage,
   ExtensionType,
   MessageStatus,
-  Model
 } from '@janhq/core'
 import { ConversationalExtension } from '@janhq/core'
-import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
+import { useAtomValue, useSetAtom } from 'jotai'
+
+import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
+import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
+
+import { toaster } from '../Toast'
 
 import { extensionManager } from '@/extension'
 import {
@@ -22,45 +26,45 @@ import {
   threadsAtom,
 } from '@/helpers/atoms/Conversation.atom'
 
-import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
-import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels'
-import { toaster } from '../Toast'
-
 export default function EventHandler({ children }: { children: ReactNode }) {
   const addNewMessage = useSetAtom(addNewMessageAtom)
   const updateMessage = useSetAtom(updateMessageAtom)
   const { downloadedModels } = useGetDownloadedModels()
-  const [activeModel, setActiveModel] = useAtom(activeModelAtom)
-  const [stateModel, setStateModel] = useAtom(stateModelAtom)
+  const setActiveModel = useSetAtom(activeModelAtom)
+  const setStateModel = useSetAtom(stateModelAtom)
 
   const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom)
   const threads = useAtomValue(threadsAtom)
+  const modelsRef = useRef(downloadedModels)
   const threadsRef = useRef(threads)
 
   useEffect(() => {
     threadsRef.current = threads
   }, [threads])
 
+  useEffect(() => {
+    modelsRef.current = downloadedModels
+  }, [downloadedModels])
+
   async function handleNewMessageResponse(message: ThreadMessage) {
     addNewMessage(message)
   }
 
   async function handleModelReady(res: any) {
-      const model = downloadedModels.find((e) => e.id === res.modelId)
-      setActiveModel(model)
-      toaster({
-        title: 'Success!',
-        description: `Model ${res.modelId} has been started.`,
-      })
-      setStateModel(() => ({
-        state: 'stop',
-        loading: false,
-        model: res.modelId,
-      }))
-    }
+    const model = modelsRef.current?.find((e) => e.id === res.modelId)
+    setActiveModel(model)
+    toaster({
+      title: 'Success!',
+      description: `Model ${res.modelId} has been started.`,
+    })
+    setStateModel(() => ({
+      state: 'stop',
+      loading: false,
+      model: res.modelId,
+    }))
+  }
 
   async function handleModelStop(res: any) {
-    const model = downloadedModels.find((e) => e.id === res.modelId)
     setTimeout(async () => {
       setActiveModel(undefined)
       setStateModel({ state: 'start', loading: false, model: '' })
@@ -72,13 +76,13 @@ export default function EventHandler({ children }: { children: ReactNode }) {
   }
 
   async function handleModelFail(res: any) {
-      const errorMessage = `${res.error}`
-      alert(errorMessage)
-      setStateModel(() => ({
-        state: 'start',
-        loading: false,
-        model: res.modelId,
-      }))
+    const errorMessage = `${res.error}`
+    alert(errorMessage)
+    setStateModel(() => ({
+      state: 'start',
+      loading: false,
+      model: res.modelId,
+    }))
   }
 
   async function handleMessageResponseUpdate(message: ThreadMessage) {
diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 4f1565e15..3a1343489 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,5 +1,10 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
-import { EventName, ExtensionType, InferenceExtension, events } from '@janhq/core'
+import {
+  EventName,
+  ExtensionType,
+  InferenceExtension,
+  events,
+} from '@janhq/core'
 import { Model, ModelSettingParams } from '@janhq/core'
 import { atom, useAtom } from 'jotai'
 
@@ -11,7 +16,11 @@ import { extensionManager } from '@/extension'
 
 export const activeModelAtom = atom<Model | undefined>(undefined)
 
-export const stateModelAtom = atom({ state: 'start', loading: false, model: '' })
+export const stateModelAtom = atom({
+  state: 'start',
+  loading: false,
+  model: '',
+})
 
 export function useActiveModel() {
   const [activeModel, setActiveModel] = useAtom(activeModelAtom)

From 750f09cc81bd0e55338faa97ce81d30b14d5b619 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 23:07:59 +0700
Subject: [PATCH 19/58] fix: update engine field in tiny llama 1.1b

---
 models/tinyllama-1.1b/model.json | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json
index bc5dcb0c6..bb6aeaf5c 100644
--- a/models/tinyllama-1.1b/model.json
+++ b/models/tinyllama-1.1b/model.json
@@ -10,8 +10,7 @@
       "ctx_len": 2048,
       "system_prompt": "<|system|>\n",
       "user_prompt": "<|user|>\n",
-      "ai_prompt": "<|assistant|>\n",
-      "engine": "nitro"
+      "ai_prompt": "<|assistant|>\n"
   },
   "parameters": {
       "max_tokens": 2048
@@ -20,5 +19,6 @@
       "author": "TinyLlama",
       "tags": ["Tiny", "Foundation Model"],
       "size": 637000000
-  }
+  },
+  "engine": "nitro"
 }
\ No newline at end of file

From 0c838cecb10998a89112fc11064c3115ab84c194 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 23:10:31 +0700
Subject: [PATCH 20/58] fix: Update event types

---
 core/src/events.ts      | 4 +++-
 core/src/types/index.ts | 8 ++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/core/src/events.ts b/core/src/events.ts
index 81451c1f0..bfaf3ea58 100644
--- a/core/src/events.ts
+++ b/core/src/events.ts
@@ -14,8 +14,10 @@ export enum EventName {
   OnModelReady = "OnModelReady",
   /** The `OnModelFail` event is emitted when a model fails loading. */
   OnModelFail = "OnModelFail",
-  /** The `OnModelStop` event is emitted when a model fails loading. */
+  /** The `OnModelStop` event is emitted when a model start to stop. */
   OnModelStop = "OnModelStop",
+  /** The `OnModelStopped` event is emitted when a model stopped ok. */
+  OnModelStopped = "OnModelStopped",
 }
 
 /**
diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index 5b45d4cc8..b80012dd7 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -43,6 +43,9 @@ export type MessageRequest = {
 
   /** Runtime parameters for constructing a chat completion request **/
   parameters?: ModelRuntimeParam;
+
+  /** Settings for constructing a chat completion request **/
+  model?: ModelInfo
 };
 
 /**
@@ -154,6 +157,7 @@ export type ModelInfo = {
   id: string;
   settings: ModelSettingParams;
   parameters: ModelRuntimeParam;
+  engine: InferenceEngine;
 };
 
 /**
@@ -172,7 +176,7 @@ export type ThreadState = {
  */
 
 enum InferenceEngine {
-  llama_cpp = "llama_cpp",
+  nitro = "nitro",
   openai = "openai",
   nvidia_triton = "nvidia_triton",
   hf_endpoint = "hf_endpoint",
@@ -246,7 +250,7 @@ export interface Model {
    */
   metadata: ModelMetadata;
   /**
-   * The model engine. Enum: "llamacpp" "openai"
+   * The model engine.
    */
   engine: InferenceEngine;
 }

From a8e33c2a97ad88b5b6537420a97f5892c17b57ce Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Mon, 4 Dec 2023 23:18:26 +0700
Subject: [PATCH 21/58] fix: Update engine as enum

---
 docs/openapi/specs/models.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml
index 1bd7e65d7..1322b90ee 100644
--- a/docs/openapi/specs/models.yaml
+++ b/docs/openapi/specs/models.yaml
@@ -289,7 +289,7 @@ components:
             engine:
               type: string
               description: "The engine used by the model."
-              example: "llamacpp"
+              enum: [nitro, openai, hf_inference]
             quantization:
               type: string
               description: "Quantization parameter of the model."

From a51b206cb8e769f2839f9279c071fcd68e7fd3f0 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:25:50 +0700
Subject: [PATCH 22/58] fix: Add OnModelStopped Event

---
 web/containers/Providers/EventHandler.tsx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index d24caf34d..533e8cd4a 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -7,6 +7,7 @@ import {
   ThreadMessage,
   ExtensionType,
   MessageStatus,
+  Model,
 } from '@janhq/core'
 import { ConversationalExtension } from '@janhq/core'
 import { useAtomValue, useSetAtom } from 'jotai'
@@ -64,13 +65,13 @@ export default function EventHandler({ children }: { children: ReactNode }) {
     }))
   }
 
-  async function handleModelStop(res: any) {
+  async function handleModelStopped(model: Model) {
     setTimeout(async () => {
       setActiveModel(undefined)
       setStateModel({ state: 'start', loading: false, model: '' })
       toaster({
         title: 'Success!',
-        description: `Model ${res.modelId} has been stopped.`,
+        description: `Model ${model.id} has been stopped.`,
       })
     }, 500)
   }
@@ -123,7 +124,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
       events.on(EventName.OnMessageUpdate, handleMessageResponseUpdate)
       events.on(EventName.OnModelReady, handleModelReady)
       events.on(EventName.OnModelFail, handleModelFail)
-      events.on(EventName.OnModelStop, handleModelStop)
+      events.on(EventName.OnModelStopped, handleModelStopped)
     }
     // eslint-disable-next-line react-hooks/exhaustive-deps
   }, [])

From ffbfaf1fd14e4e8d93488d08fbfd5101a05d6769 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:26:19 +0700
Subject: [PATCH 23/58] feat: Add Event OnModelStop emission to web

---
 web/hooks/useActiveModel.ts | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts
index 3a1343489..699b16279 100644
--- a/web/hooks/useActiveModel.ts
+++ b/web/hooks/useActiveModel.ts
@@ -1,8 +1,6 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 import {
   EventName,
-  ExtensionType,
-  InferenceExtension,
   events,
 } from '@janhq/core'
 import { Model, ModelSettingParams } from '@janhq/core'
@@ -60,8 +58,9 @@ export function useActiveModel() {
   }
 
   const stopModel = async (modelId: string) => {
+    const model = downloadedModels.find((e) => e.id === modelId)
     setStateModel({ state: 'stop', loading: true, model: modelId })
-    events.emit(EventName.OnModelStop, modelId)
+    events.emit(EventName.OnModelStop, model)
   }
 
   return { activeModel, startModel, stopModel, stateModel }

From 05b9a7bfc93bfc4064075effd9a931fff8a95cce Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:26:41 +0700
Subject: [PATCH 24/58] fix: Delete default oai gpt 3.5 settings

---
 models/openai-gpt-3.5/model.json | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/models/openai-gpt-3.5/model.json b/models/openai-gpt-3.5/model.json
index c794349aa..0950981af 100644
--- a/models/openai-gpt-3.5/model.json
+++ b/models/openai-gpt-3.5/model.json
@@ -6,9 +6,7 @@
     "version": 1.0,
     "description": "OpenAI GPT 3.5 model is extremely good",
     "format": "api",
-    "settings": {
-      "base_url": "https://api.openai.com/v1"
-    },
+    "settings": {},
     "parameters": {
       "max_tokens": 4096
     },

From cb60a7cf92dd767a48b7b93271a97040a3b859fc Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:27:09 +0700
Subject: [PATCH 25/58] chore: Change type ModelRuntimeParam to
 ModelRuntimeParams

---
 core/src/types/index.ts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index b80012dd7..99f7b1d0f 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -42,7 +42,7 @@ export type MessageRequest = {
   messages?: ChatCompletionMessage[];
 
   /** Runtime parameters for constructing a chat completion request **/
-  parameters?: ModelRuntimeParam;
+  parameters?: ModelRuntimeParams;
 
   /** Settings for constructing a chat completion request **/
   model?: ModelInfo
@@ -156,7 +156,7 @@ export type ThreadAssistantInfo = {
 export type ModelInfo = {
   id: string;
   settings: ModelSettingParams;
-  parameters: ModelRuntimeParam;
+  parameters: ModelRuntimeParams;
   engine: InferenceEngine;
 };
 
@@ -243,7 +243,7 @@ export interface Model {
   /**
    * The model runtime parameters.
    */
-  parameters: ModelRuntimeParam;
+  parameters: ModelRuntimeParams;
 
   /**
    * Metadata of the model.
@@ -287,7 +287,7 @@ export type ModelSettingParams = {
 /**
  * The available model runtime parameters.
  */
-export type ModelRuntimeParam = {
+export type ModelRuntimeParams = {
   temperature?: number;
   token_limit?: number;
   top_k?: number;

From fb8729bff3f80bf7441190b349124c38f9e99c4f Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:27:33 +0700
Subject: [PATCH 26/58] fix: Check model engine openai for `Use`

---
 web/screens/ExploreModels/ExploreModelItemHeader/index.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index f5d54f0be..69cd8af3e 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -57,7 +57,7 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
 
   let downloadButton;
 
-  if (model.engine !== 'nitro') {
+  if (model.engine === 'openai') {
     downloadButton = (
       <Button onClick={() => onDownloadClick()}>
         Use

From 6f55cffb202f256df8b89224a5f41a9fae90d9d0 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:28:43 +0700
Subject: [PATCH 27/58] fix: Add model object to MessageRequest

---
 web/hooks/useSendChatMessage.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 1a06a8e88..97a04bb9f 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -132,6 +132,7 @@ export default function useSendChatMessage() {
               id: selectedModel.id,
               settings: selectedModel.settings,
               parameters: selectedModel.parameters,
+              engine: selectedModel.engine,
             },
           },
         ],
@@ -179,6 +180,7 @@ export default function useSendChatMessage() {
       threadId: activeThread.id,
       messages,
       parameters: activeThread.assistants[0].model.parameters,
+      model: activeThread.assistants[0].model,
     }
     const timestamp = Date.now()
     const threadMessage: ThreadMessage = {

From b970e978693fae6a6b461a19c21afae962e1d594 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:29:48 +0700
Subject: [PATCH 28/58] chore: Move interface definition location

---
 .../inference-nitro-extension/src/@types/global.d.ts       | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index 7267f0940..0c48c6f34 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -1,2 +1,9 @@
 declare const MODULE: string;
 declare const INFERENCE_URL: string;
+
+interface EngineSettings {
+    ctx_len: number;
+    ngl: number;
+    cont_batching: boolean;
+    embedding: boolean;
+}
\ No newline at end of file

From 516e226290429c9192ed3ef3d53a25ae51aaa482 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:29:53 +0700
Subject: [PATCH 29/58] chore: Move interface definition location

---
 .../src/@types/global.d.ts                    | 27 ++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts
index 7267f0940..988c3c7db 100644
--- a/extensions/inference-openai-extension/src/@types/global.d.ts
+++ b/extensions/inference-openai-extension/src/@types/global.d.ts
@@ -1,2 +1,27 @@
+import { Model } from "@janhq/core";
+
 declare const MODULE: string;
-declare const INFERENCE_URL: string;
+
+declare interface EngineSettings {
+    base_url?: string;
+    api_key?: string;
+}
+
+enum OpenAIChatCompletionModelName {
+    'gpt-3.5-turbo-instruct' = 'gpt-3.5-turbo-instruct',
+    'gpt-3.5-turbo-instruct-0914' = 'gpt-3.5-turbo-instruct-0914',
+    'gpt-4-1106-preview' = 'gpt-4-1106-preview',
+    'gpt-3.5-turbo-0613' = 'gpt-3.5-turbo-0613',
+    'gpt-3.5-turbo-0301' = 'gpt-3.5-turbo-0301',
+    'gpt-3.5-turbo' = 'gpt-3.5-turbo',
+    'gpt-3.5-turbo-16k-0613' = 'gpt-3.5-turbo-16k-0613',
+    'gpt-3.5-turbo-1106' = 'gpt-3.5-turbo-1106',
+    'gpt-4-vision-preview' = 'gpt-4-vision-preview',
+    'gpt-4' = 'gpt-4',
+    'gpt-4-0314' = 'gpt-4-0314',
+    'gpt-4-0613' = 'gpt-4-0613',
+}
+
+declare type OpenAIModel = Omit<Model, "id"> & {
+    id: OpenAIChatCompletionModelName;
+};
\ No newline at end of file

From 4f2a3b7b2786d6e2a4e277cffe74d0f5c3d5366d Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:32:01 +0700
Subject: [PATCH 30/58] fix: Add dynamic values from engine settings and model
 params to inference request

---
 .../inference-nitro-extension/src/helpers/sse.ts      | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts
index f427e443c..4da520fe3 100644
--- a/extensions/inference-nitro-extension/src/helpers/sse.ts
+++ b/extensions/inference-nitro-extension/src/helpers/sse.ts
@@ -1,3 +1,4 @@
+import { Model } from "@janhq/core";
 import { Observable } from "rxjs";
 /**
  * Sends a request to the inference server to generate a response based on the recent messages.
@@ -6,21 +7,23 @@ import { Observable } from "rxjs";
  */
 export function requestInference(
   recentMessages: any[],
+  engine: EngineSettings,
+  model: Model,
   controller?: AbortController
 ): Observable<string> {
   return new Observable((subscriber) => {
     const requestBody = JSON.stringify({
       messages: recentMessages,
-      stream: true,
-      model: "gpt-3.5-turbo",
-      max_tokens: 2048,
+      model: model.id,
+      stream: model.parameters.stream || true,
+      max_tokens: model.parameters.max_tokens || 2048,
     });
     fetch(INFERENCE_URL, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
-        Accept: "text/event-stream",
         "Access-Control-Allow-Origin": "*",
+        Accept: "text/event-stream",
       },
       body: requestBody,
       signal: controller?.signal,

From 16f2ffe9b434a00860eb19df0ae35626603a75ad Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:32:09 +0700
Subject: [PATCH 31/58] fix: Add dynamic values from engine settings and model
 params to inference request

---
 .../inference-openai-extension/src/helpers/sse.ts    | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts
index f427e443c..51333e0df 100644
--- a/extensions/inference-openai-extension/src/helpers/sse.ts
+++ b/extensions/inference-openai-extension/src/helpers/sse.ts
@@ -1,26 +1,32 @@
 import { Observable } from "rxjs";
+import { EngineSettings, OpenAIModel } from "../@types/global";
+
 /**
  * Sends a request to the inference server to generate a response based on the recent messages.
  * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @param engine - The engine settings to use for the inference.
+ * @param model - The model to use for the inference.
  * @returns An Observable that emits the generated response as a string.
  */
 export function requestInference(
   recentMessages: any[],
+  engine: EngineSettings,
+  model: OpenAIModel,
   controller?: AbortController
 ): Observable<string> {
   return new Observable((subscriber) => {
     const requestBody = JSON.stringify({
       messages: recentMessages,
       stream: true,
-      model: "gpt-3.5-turbo",
-      max_tokens: 2048,
+      model: model.id,
     });
-    fetch(INFERENCE_URL, {
+    fetch(engine.base_url, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
         Accept: "text/event-stream",
         "Access-Control-Allow-Origin": "*",
+        Authorization: `Bearer ${engine.api_key}`,
       },
       body: requestBody,
       signal: controller?.signal,

From 0c3e23665b50184ee77d5304f489ae7b5d5f3209 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:45:23 +0700
Subject: [PATCH 32/58] fix: Add dynamic values from engine settings and model
 params

---
 .../inference-nitro-extension/src/index.ts    | 86 +++++++++++++------
 .../inference-nitro-extension/src/module.ts   | 56 ++++++------
 2 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 614c32586..89d051f16 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -20,19 +20,13 @@ import {
   executeOnMain,
   getUserSpace,
   fs,
+  Model,
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
 import { join } from "path";
 
-interface EngineSettings {
-  ctx_len: number;
-  ngl: number;
-  cont_batching: boolean;
-  embedding: boolean;
-}
-
 /**
  * A class that implements the InferenceExtension interface from the @janhq/core package.
  * The class provides methods for initializing and stopping a model, and for making inference requests.
@@ -42,7 +36,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   private static readonly _homeDir = 'engines'
   private static readonly _engineMetadataFileName = 'nitro.json'
 
-  private _engineSettings: EngineSettings = {
+  static _currentModel: Model;
+
+  static _engineSettings: EngineSettings = {
     "ctx_len": 2048,
     "ngl": 100,
     "cont_batching": false,
@@ -65,9 +61,19 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   onLoad(): void {
     fs.mkdir(JanInferenceNitroExtension._homeDir)
     this.writeDefaultEngineSettings()
+
+    // Events subscription
     events.on(EventName.OnMessageSent, (data) =>
     JanInferenceNitroExtension.handleMessageRequest(data, this)
     );
+
+    events.on(EventName.OnModelInit, (model: Model) => {
+      JanInferenceNitroExtension.handleModelInit(model);
+    });
+
+    events.on(EventName.OnModelStop, (model: Model) => {
+      JanInferenceNitroExtension.handleModelStop(model);
+    });
   }
 
   /**
@@ -85,15 +91,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
   async initModel(
     modelId: string,
     settings?: ModelSettingParams
-  ): Promise<void> {
-    const userSpacePath = await getUserSpace();
-    const modelFullPath = join(userSpacePath, "models", modelId, modelId);
-    
-    return executeOnMain(MODULE, "initModel", {
-      modelFullPath,
-      settings,
-    });
-  }
+  ): Promise<void> {}
 
   /**
    * Stops the model.
@@ -116,16 +114,41 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     try {
       const engine_json = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName)
       if (await fs.checkFileExists(engine_json)) {
-        this._engineSettings = JSON.parse(await fs.readFile(engine_json))
+        JanInferenceNitroExtension._engineSettings = JSON.parse(await fs.readFile(engine_json))
       }
       else {
-        await fs.writeFile(engine_json, JSON.stringify(this._engineSettings, null, 2))
+        await fs.writeFile(engine_json, JSON.stringify(JanInferenceNitroExtension._engineSettings, null, 2))
       }
     } catch (err) {
       console.error(err)
     }
   }
 
+  private static async handleModelInit(model: Model) {
+    if (model.engine !== "nitro") { return }
+    const userSpacePath = await getUserSpace();
+    const modelFullPath = join(userSpacePath, "models", model.id, model.id);
+
+    const nitro_init_result = await executeOnMain(MODULE, "initModel", {
+      modelFullPath: modelFullPath,
+      model: model
+    });
+
+    if (nitro_init_result.error) {
+      events.emit(EventName.OnModelFail, model)
+    }
+    else{
+      events.emit(EventName.OnModelReady, model);
+    }
+  }
+
+  private static async handleModelStop(model: Model) {
+    if (model.engine !== 'nitro') { return }
+    else { 
+      events.emit(EventName.OnModelStopped, model)
+    }
+  }
+
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
@@ -145,14 +168,17 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     };
 
     return new Promise(async (resolve, reject) => {
-      requestInference(data.messages ?? []).subscribe({
+      requestInference(data.messages ?? [], 
+          JanInferenceNitroExtension._engineSettings, 
+          JanInferenceNitroExtension._currentModel)
+        .subscribe({
         next: (_content) => {},
-        complete: async () => {
-          resolve(message);
-        },
-        error: async (err) => {
-          reject(err);
-        },
+          complete: async () => {
+            resolve(message);
+          },
+          error: async (err) => {
+            reject(err);
+          },
       });
     });
   }
@@ -167,6 +193,8 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     data: MessageRequest,
     instance: JanInferenceNitroExtension
   ) {
+    if (data.model.engine !== 'nitro') { return }
+
     const timestamp = Date.now();
     const message: ThreadMessage = {
       id: ulid(),
@@ -184,7 +212,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     instance.isCancelled = false;
     instance.controller = new AbortController();
 
-    requestInference(data.messages, instance.controller).subscribe({
+    requestInference(data.messages ?? [], 
+        JanInferenceNitroExtension._engineSettings, 
+        JanInferenceNitroExtension._currentModel, 
+        instance.controller)
+      .subscribe({
       next: (content) => {
         const messageContent: ThreadContent = {
           type: ContentType.Text,
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 3eeedec32..5b7a52c60 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -36,35 +36,33 @@ interface InitModelResponse {
  * TODO: Should it be startModel instead?
  */
 function initModel(wrapper: any): Promise<InitModelResponse> {
-  if (wrapper.settings.engine !== "llamacpp") {
-    return
-  }
-  // 1. Check if the model file exists
-  currentModelFile = wrapper.modelFullPath;
-  log.info("Started to load model " + wrapper.modelFullPath);
-
-  const settings = {
-    llama_model_path: currentModelFile,
-    ...wrapper.settings,
-  };
-
-  log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
-
-  return (
-    // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
-    validateModelVersion()
-      .then(checkAndUnloadNitro)
-      // 2. Spawn the Nitro subprocess
-      .then(spawnNitroProcess)
-      // 4. Load the model into the Nitro subprocess (HTTP POST request)
-      .then(() => loadLLMModel(settings))
-      // 5. Check if the model is loaded successfully
-      .then(validateModelStatus)
-      .catch((err) => {
-        log.error("error: " + JSON.stringify(err));
-        return { error: err, currentModelFile };
-      })
-  );
+    currentModelFile = wrapper.modelFullPath;
+    if (wrapper.model.engine !== "nitro") {
+      return Promise.resolve({ error: "Not a nitro model" })
+    }
+    else {
+      log.info("Started to load model " + wrapper.model.modelFullPath);
+      const settings = {
+        llama_model_path: currentModelFile,
+        ...wrapper.model.settings,
+      };
+      log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
+      return (
+        // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
+        validateModelVersion()
+          .then(checkAndUnloadNitro)
+          // 2. Spawn the Nitro subprocess
+          .then(spawnNitroProcess)
+          // 4. Load the model into the Nitro subprocess (HTTP POST request)
+          .then(() => loadLLMModel(settings))
+          // 5. Check if the model is loaded successfully
+          .then(validateModelStatus)
+          .catch((err) => {
+            log.error("error: " + JSON.stringify(err));
+            return { error: err, currentModelFile };
+          })
+        );
+    }
 }
 
 /**

From 7ed8c316292844b4932ac0132fa82b49c3a9f239 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 00:45:49 +0700
Subject: [PATCH 33/58] fix: Add dynamic values from engine settings and model
 params

---
 .../inference-openai-extension/src/index.ts   | 81 ++++++++++---------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 8d72422d3..95fac2fc0 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -17,20 +17,13 @@ import {
   ThreadContent,
   ThreadMessage,
   events,
-  executeOnMain,
-  getUserSpace,
   fs,
-  Model,
 } from "@janhq/core";
 import { InferenceExtension } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
 import { join } from "path";
-
-interface EngineSettings {
-  base_url?: string;
-  api_key?: string;
-}
+import { EngineSettings, OpenAIModel } from "./@types/global";
 
 /**
  * A class that implements the InferenceExtension interface from the @janhq/core package.
@@ -41,12 +34,16 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
   private static readonly _homeDir = 'engines'
   private static readonly _engineMetadataFileName = 'openai.json'
   
-  private _engineSettings: EngineSettings = {
+  static _currentModel: OpenAIModel;
+
+  static _engineSettings: EngineSettings = {
     "base_url": "https://api.openai.com/v1",
     "api_key": "sk-<your key here>"
-  }
+  };
+
   controller = new AbortController();
   isCancelled = false;
+
   /**
    * Returns the type of the extension.
    * @returns {ExtensionType} The type of the extension.
@@ -55,7 +52,6 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
   type(): ExtensionType {
     return undefined;
   }
-// janroot/engine/nitro.json
   /**
    * Subscribes to events emitted by the @janhq/core package.
    */
@@ -68,12 +64,12 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
       JanInferenceOpenAIExtension.handleMessageRequest(data, this)
     );
 
-    events.on(EventName.OnModelInit, (data: Model) => {
-      JanInferenceOpenAIExtension.handleModelInit(data);
+    events.on(EventName.OnModelInit, (model: OpenAIModel) => {
+      JanInferenceOpenAIExtension.handleModelInit(model);
     });
 
-    events.on(EventName.OnModelStop, (data: Model) => {
-      JanInferenceOpenAIExtension.handleModelStop(data);
+    events.on(EventName.OnModelStop, (model: OpenAIModel) => {
+      JanInferenceOpenAIExtension.handleModelStop(model);
     });
   }
 
@@ -98,10 +94,10 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     try {
       const engine_json = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName)
       if (await fs.checkFileExists(engine_json)) {
-        this._engineSettings = JSON.parse(await fs.readFile(engine_json))
+        JanInferenceOpenAIExtension._engineSettings = JSON.parse(await fs.readFile(engine_json))
       }
       else {
-        await fs.writeFile(engine_json, JSON.stringify(this._engineSettings, null, 2))
+        await fs.writeFile(engine_json, JSON.stringify(JanInferenceOpenAIExtension._engineSettings, null, 2))
       }
     } catch (err) {
       console.error(err)
@@ -141,32 +137,34 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     };
 
     return new Promise(async (resolve, reject) => {
-      requestInference(data.messages ?? []).subscribe({
-        next: (_content) => {},
-        complete: async () => {
-          resolve(message);
-        },
-        error: async (err) => {
-          reject(err);
-        },
+      requestInference(data.messages ?? [], 
+          JanInferenceOpenAIExtension._engineSettings, 
+          JanInferenceOpenAIExtension._currentModel)
+        .subscribe({
+          next: (_content) => {},
+          complete: async () => {
+            resolve(message);
+          },
+          error: async (err) => {
+            reject(err);
+          },
       });
     });
   }
 
-  private static async handleModelInit(data: Model) {
-    console.log('Model init success', data)
-    // Add filter data engine = openai
-    if (data.engine !== 'openai') { return }
-    // If model success
-    events.emit(EventName.OnModelReady, {modelId: data.id})
-    // If model failed
-    // events.emit(EventName.OnModelFail, {modelId: data.id})
+  private static async handleModelInit(model: OpenAIModel) {
+    if (model.engine !== 'openai') { return }
+    else {
+      JanInferenceOpenAIExtension._currentModel = model
+      // Todo: Check model list with API key
+      events.emit(EventName.OnModelReady, model)
+      // events.emit(EventName.OnModelFail, model)
+    }
   }
 
-  private static async handleModelStop(data: Model) {
-    // Add filter data engine = openai
-    if (data.engine !== 'openai') { return }
-    events.emit(EventName.OnModelStop, {modelId: data.id})
+  private static async handleModelStop(model: OpenAIModel) {
+    if (model.engine !== 'openai') { return }
+    events.emit(EventName.OnModelStopped, model)
   }
 
   /**
@@ -179,6 +177,8 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     data: MessageRequest,
     instance: JanInferenceOpenAIExtension
   ) {
+    if (data.model.engine !== 'openai') { return }
+    
     const timestamp = Date.now();
     const message: ThreadMessage = {
       id: ulid(),
@@ -196,7 +196,12 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     instance.isCancelled = false;
     instance.controller = new AbortController();
 
-    requestInference(data.messages, instance.controller).subscribe({
+    requestInference(
+      data?.messages ?? [],
+      this._engineSettings,
+      JanInferenceOpenAIExtension._currentModel,
+      instance.controller
+    ).subscribe({
       next: (content) => {
         const messageContent: ThreadContent = {
           type: ContentType.Text,

From 06ca4142bff277abb55fd214b3bfd6b66a120ecb Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 01:20:30 +0700
Subject: [PATCH 34/58] fix: Change model folder name for openai gpt-3.5-turbo

---
 models/{openai-gpt-3.5 => gpt-3.5-turbo}/model.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename models/{openai-gpt-3.5 => gpt-3.5-turbo}/model.json (93%)

diff --git a/models/openai-gpt-3.5/model.json b/models/gpt-3.5-turbo/model.json
similarity index 93%
rename from models/openai-gpt-3.5/model.json
rename to models/gpt-3.5-turbo/model.json
index 0950981af..4262564a3 100644
--- a/models/openai-gpt-3.5/model.json
+++ b/models/gpt-3.5-turbo/model.json
@@ -1,6 +1,6 @@
 {
     "source_url": "https://openai.com",
-    "id": "openai-gpt35",
+    "id": "gpt-3.5-turbo",
     "object": "model",
     "name": "OpenAI GPT 3.5",
     "version": 1.0,

From 4266d86226816b464025fb01c174aa94a974730a Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 02:03:12 +0700
Subject: [PATCH 35/58] fix: Passing model object instead of model id

---
 web/containers/Providers/EventHandler.tsx | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx
index 533e8cd4a..6d0f5ff26 100644
--- a/web/containers/Providers/EventHandler.tsx
+++ b/web/containers/Providers/EventHandler.tsx
@@ -51,17 +51,16 @@ export default function EventHandler({ children }: { children: ReactNode }) {
     addNewMessage(message)
   }
 
-  async function handleModelReady(res: any) {
-    const model = modelsRef.current?.find((e) => e.id === res.modelId)
+  async function handleModelReady(model: Model) {
     setActiveModel(model)
     toaster({
       title: 'Success!',
-      description: `Model ${res.modelId} has been started.`,
+      description: `Model ${model.id} has been started.`,
     })
     setStateModel(() => ({
       state: 'stop',
       loading: false,
-      model: res.modelId,
+      model: model.id,
     }))
   }
 

From f34024a3296062ebb43d7acd8b7407fb6a5d3aa9 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 02:03:39 +0700
Subject: [PATCH 36/58] fix: spreading model.paramters object and update
 chatCompletion route

---
 extensions/inference-openai-extension/src/helpers/sse.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts
index 51333e0df..bc170a817 100644
--- a/extensions/inference-openai-extension/src/helpers/sse.ts
+++ b/extensions/inference-openai-extension/src/helpers/sse.ts
@@ -19,8 +19,10 @@ export function requestInference(
       messages: recentMessages,
       stream: true,
       model: model.id,
+      // Model parameters spreading
+      ...model.parameters,
     });
-    fetch(engine.base_url, {
+    fetch(`${engine.base_url}/chat/completions`, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",

From a2cf42ac7639dc5b51e5ba049ea43e1c4d7a232f Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 02:03:56 +0700
Subject: [PATCH 37/58] fix: Spreading model.parameters

---
 extensions/inference-nitro-extension/src/helpers/sse.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts
index 4da520fe3..65cb0d6ba 100644
--- a/extensions/inference-nitro-extension/src/helpers/sse.ts
+++ b/extensions/inference-nitro-extension/src/helpers/sse.ts
@@ -15,8 +15,9 @@ export function requestInference(
     const requestBody = JSON.stringify({
       messages: recentMessages,
       model: model.id,
-      stream: model.parameters.stream || true,
-      max_tokens: model.parameters.max_tokens || 2048,
+      stream: true,
+      // Model parameters spreading
+      ...model.parameters,
     });
     fetch(INFERENCE_URL, {
       method: "POST",

From 44bfcaabd90bdf116bcecabd44b086d9ba3ddb7c Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 02:04:24 +0700
Subject: [PATCH 38/58] fix: Add type def global for nitro extension -
 inference

---
 .../src/@types/global.d.ts                    | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index 0c48c6f34..5776cca20 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -1,9 +1,27 @@
 declare const MODULE: string;
 declare const INFERENCE_URL: string;
 
+/**
+ * The parameters for the initModel function.
+ * @property settings - The settings for the machine learning model.
+ * @property settings.ctx_len - The context length.
+ * @property settings.ngl - The number of generated tokens.
+ * @property settings.cont_batching - Whether to use continuous batching.
+ * @property settings.embedding - Whether to use embedding.
+ */
 interface EngineSettings {
     ctx_len: number;
     ngl: number;
     cont_batching: boolean;
     embedding: boolean;
-}
\ No newline at end of file
+}
+
+/**
+ * The response from the initModel function.
+ * @property error - An error message if the model fails to load.
+ */
+interface ModelOperationResponse {
+    error?: any;
+    modelFile?: string;
+  }
+  
\ No newline at end of file

From 3987fdc95b0e8c1cf56f0318b842f9ca466d52eb Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 02:04:41 +0700
Subject: [PATCH 39/58] feat: Add nitro inference engine stop model handler

---
 .../inference-nitro-extension/src/index.ts    | 19 +++++++-----
 .../inference-nitro-extension/src/module.ts   | 30 +++++++++++--------
 2 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 89d051f16..4d6d87dea 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -134,17 +134,19 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
       model: model
     });
 
-    if (nitro_init_result.error) {
+    if (nitro_init_result.error === null) {
       events.emit(EventName.OnModelFail, model)
     }
     else{
+      JanInferenceNitroExtension._currentModel = model;
       events.emit(EventName.OnModelReady, model);
     }
   }
 
   private static async handleModelStop(model: Model) {
     if (model.engine !== 'nitro') { return }
-    else { 
+    else {
+      await executeOnMain(MODULE, "stopModel")
       events.emit(EventName.OnModelStopped, model)
     }
   }
@@ -168,9 +170,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     };
 
     return new Promise(async (resolve, reject) => {
-      requestInference(data.messages ?? [], 
+      requestInference(
+          data.messages ?? [], 
           JanInferenceNitroExtension._engineSettings, 
-          JanInferenceNitroExtension._currentModel)
+          JanInferenceNitroExtension._currentModel
+        )
         .subscribe({
         next: (_content) => {},
           complete: async () => {
@@ -212,11 +216,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     instance.isCancelled = false;
     instance.controller = new AbortController();
 
-    requestInference(data.messages ?? [], 
+    requestInference(
+        data.messages ?? [], 
         JanInferenceNitroExtension._engineSettings, 
         JanInferenceNitroExtension._currentModel, 
-        instance.controller)
-      .subscribe({
+        instance.controller
+      ).subscribe({
       next: (content) => {
         const messageContent: ThreadContent = {
           type: ContentType.Text,
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 5b7a52c60..1323dca6d 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -20,22 +20,25 @@ let subprocess = null;
 let currentModelFile = null;
 
 /**
- * The response from the initModel function.
- * @property error - An error message if the model fails to load.
+ * Stops a Nitro subprocess.
+ * @param wrapper - The model wrapper.
+ * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
  */
-interface InitModelResponse {
-  error?: any;
-  modelFile?: string;
+function stopModel(): Promise<ModelOperationResponse> {
+  return new Promise((resolve, reject) => {
+    checkAndUnloadNitro()
+    resolve({ error: undefined})
+  })
 }
 
 /**
  * Initializes a Nitro subprocess to load a machine learning model.
- * @param modelFile - The name of the machine learning model file.
+ * @param wrapper - The model wrapper.
  * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
  * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
  * TODO: Should it be startModel instead?
  */
-function initModel(wrapper: any): Promise<InitModelResponse> {
+function initModel(wrapper: any): Promise<ModelOperationResponse> {
     currentModelFile = wrapper.modelFullPath;
     if (wrapper.model.engine !== "nitro") {
       return Promise.resolve({ error: "Not a nitro model" })
@@ -89,11 +92,11 @@ function loadLLMModel(settings): Promise<Response> {
 
 /**
  * Validates the status of a model.
- * @returns {Promise<InitModelResponse>} A promise that resolves to an object.
+ * @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
  * If the model is loaded successfully, the object is empty.
  * If the model is not loaded successfully, the object contains an error message.
  */
-async function validateModelStatus(): Promise<InitModelResponse> {
+async function validateModelStatus(): Promise<ModelOperationResponse> {
   // Send a GET request to the validation URL.
   // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
   return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
@@ -140,17 +143,18 @@ function killSubprocess(): Promise<void> {
  * Check port is used or not, if used, attempt to unload model
  * If unload failed, kill the port
  */
-function checkAndUnloadNitro() {
-  return tcpPortUsed.check(PORT, LOCAL_HOST).then((inUse) => {
+async function checkAndUnloadNitro() {
+  return tcpPortUsed.check(PORT, LOCAL_HOST).then(async (inUse) => {
     // If inUse - try unload or kill process, otherwise do nothing
     if (inUse) {
       // Attempt to unload model
-      return fetch(NITRO_HTTP_UNLOAD_MODEL_URL, {
+      return await fetch(NITRO_HTTP_UNLOAD_MODEL_URL, {
         method: "GET",
         headers: {
           "Content-Type": "application/json",
         },
-      }).catch((err) => {
+      })
+      .catch((err) => {
         console.error(err);
         // Fallback to kill the port
         return killSubprocess();

From 6add24c643bff98231f00667b47d666e782b6a71 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 17:36:48 +0700
Subject: [PATCH 40/58] fix: debugging

---
 models/deepseek-coder-34b/model.json   | 3 ++-
 models/llama2-chat-7b-q4/model.json    | 3 ++-
 models/noromaid-20b/model.json         | 2 +-
 models/openhermes-neural-7b/model.json | 5 +++--
 models/rocket-3b/model.json            | 2 +-
 web/hooks/useSendChatMessage.ts        | 3 ++-
 6 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json
index eb8e8e8c1..402419357 100644
--- a/models/deepseek-coder-34b/model.json
+++ b/models/deepseek-coder-34b/model.json
@@ -19,6 +19,7 @@
       "author": "Deepseek, The Bloke",
       "tags": ["34B", "Foundational Model"],
       "size": 26040000000
-    }
+    },
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json
index b28d4e082..2d1a67236 100644
--- a/models/llama2-chat-7b-q4/model.json
+++ b/models/llama2-chat-7b-q4/model.json
@@ -19,6 +19,7 @@
       "author": "MetaAI, The Bloke",
       "tags": ["7B", "Foundational Model"],
       "size": 4080000000
-    }
+    },
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/noromaid-20b/model.json b/models/noromaid-20b/model.json
index a7c4bceb8..5c937a831 100644
--- a/models/noromaid-20b/model.json
+++ b/models/noromaid-20b/model.json
@@ -20,6 +20,6 @@
       "tags": ["34B", "Finetuned"],
       "size": 12040000000
     },
-    "engine": "llama_cpp"
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json
index 707967e5a..8dcb51ad7 100644
--- a/models/openhermes-neural-7b/model.json
+++ b/models/openhermes-neural-7b/model.json
@@ -20,5 +20,6 @@
       "tags": ["Featured", "7B", "Merged"],
       "size": 4370000000,
       "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png"
-    }
-  }
\ No newline at end of file
+    },
+    "engine": "nitro"
+  }
diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json
index b1d338e11..8d4fc76bf 100644
--- a/models/rocket-3b/model.json
+++ b/models/rocket-3b/model.json
@@ -20,5 +20,5 @@
       "tags": ["Tiny", "Finetuned"],
       "size": 1710000000
     },
-    "engine": "llama_cpp"
+    "engine": "nitro"
   }  
\ No newline at end of file
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 97a04bb9f..2d2d54ae0 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -180,7 +180,7 @@ export default function useSendChatMessage() {
       threadId: activeThread.id,
       messages,
       parameters: activeThread.assistants[0].model.parameters,
-      model: activeThread.assistants[0].model,
+      model: selectedModel??activeThread.assistants[0].model,
     }
     const timestamp = Date.now()
     const threadMessage: ThreadMessage = {
@@ -215,6 +215,7 @@ export default function useSendChatMessage() {
       await startModel(modelId)
       setQueuedMessage(false)
     }
+    console.log('messageRequest', messageRequest)
     events.emit(EventName.OnMessageSent, messageRequest)
   }
 

From bb4b4c5c20653a4cd9e5d83abf4a8462a5efe824 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 17:39:15 +0700
Subject: [PATCH 41/58] fix: Fix resend button with model object

---
 web/hooks/useSendChatMessage.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 2d2d54ae0..3bc80c75d 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -91,6 +91,7 @@ export default function useSendChatMessage() {
       id: ulid(),
       messages: messages,
       threadId: activeThread.id,
+      model: activeThread.assistants[0].model??selectedModel,
     }
 
     const modelId = selectedModel?.id ?? activeThread.assistants[0].model.id

From 28368ee03bbc33ce61e24e9f7f8023a4cd52fed0 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 17:44:32 +0700
Subject: [PATCH 42/58] fix: Add engine nitro

---
 models/llama2-chat-70b-q4/model.json | 3 ++-
 models/llama2-chat-7b-q5/model.json  | 3 ++-
 models/mistral-ins-7b-q4/model.json  | 3 ++-
 models/mistral-ins-7b-q5/model.json  | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json
index 00349d578..07886aed5 100644
--- a/models/llama2-chat-70b-q4/model.json
+++ b/models/llama2-chat-70b-q4/model.json
@@ -19,6 +19,7 @@
       "author": "MetaAI, The Bloke",
       "tags": ["70B", "Foundational Model"],
       "size": 43920000000
-    }
+    },
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/llama2-chat-7b-q5/model.json b/models/llama2-chat-7b-q5/model.json
index b43e134eb..96c652ab2 100644
--- a/models/llama2-chat-7b-q5/model.json
+++ b/models/llama2-chat-7b-q5/model.json
@@ -19,6 +19,7 @@
       "author": "MetaAI, The Bloke",
       "tags": ["7B", "Foundational Model"],
       "size": 4780000000
-    }
+    },
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json
index a5f87c980..9fc86f1be 100644
--- a/models/mistral-ins-7b-q4/model.json
+++ b/models/mistral-ins-7b-q4/model.json
@@ -20,6 +20,7 @@
       "tags": ["Featured", "7B", "Foundational Model"],
       "size": 4370000000,
       "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png"
-    }
+    },
+    "engine": "nitro"
   }
   
\ No newline at end of file
diff --git a/models/mistral-ins-7b-q5/model.json b/models/mistral-ins-7b-q5/model.json
index b8669161e..291f2ad0e 100644
--- a/models/mistral-ins-7b-q5/model.json
+++ b/models/mistral-ins-7b-q5/model.json
@@ -19,6 +19,7 @@
       "author": "MistralAI, The Bloke",
       "tags": ["7B", "Foundational Model"],
       "size": 5130000000
-    }
+    },
+    "engine": "nitro"
   }
   
\ No newline at end of file

From 6fe901b83c98a0f81a0ed47afeb2d2b0afd6f2fb Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 17:44:51 +0700
Subject: [PATCH 43/58] fix: Hub fix for undefined model size

---
 web/utils/converter.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/web/utils/converter.ts b/web/utils/converter.ts
index 630366ed0..ed8a61d65 100644
--- a/web/utils/converter.ts
+++ b/web/utils/converter.ts
@@ -1,4 +1,5 @@
 export const toGigabytes = (input: number) => {
+  if (!input) return ''
   if (input > 1024 ** 3) {
     return (input / 1000 ** 3).toFixed(2) + 'GB'
   } else if (input > 1024 ** 2) {

From 2a853a23ea6ab35f18d70caa64d03bb3689f73a2 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 18:09:38 +0700
Subject: [PATCH 44/58] feat: Add openai models

---
 models/gpt-3.5-turbo-16k-0613/model.json | 19 +++++++++++++++++++
 models/gpt-3.5-turbo/model.json          |  4 +---
 models/gpt-4/model.json                  | 19 +++++++++++++++++++
 3 files changed, 39 insertions(+), 3 deletions(-)
 create mode 100644 models/gpt-3.5-turbo-16k-0613/model.json
 create mode 100644 models/gpt-4/model.json

diff --git a/models/gpt-3.5-turbo-16k-0613/model.json b/models/gpt-3.5-turbo-16k-0613/model.json
new file mode 100644
index 000000000..434b45cc0
--- /dev/null
+++ b/models/gpt-3.5-turbo-16k-0613/model.json
@@ -0,0 +1,19 @@
+{
+    "source_url": "https://openai.com",
+    "id": "gpt-3.5-turbo-16k-0613",
+    "object": "model",
+    "name": "OpenAI GPT 3.5",
+    "version": 1.0,
+    "description": "OpenAI GPT 3.5 model is extremely good",
+    "format": "api",
+    "settings": {},
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "OpenAI",
+      "tags": ["General", "Big Context Length"]
+    },
+    "engine": "openai"
+}
+  
\ No newline at end of file
diff --git a/models/gpt-3.5-turbo/model.json b/models/gpt-3.5-turbo/model.json
index 4262564a3..c34e5e69f 100644
--- a/models/gpt-3.5-turbo/model.json
+++ b/models/gpt-3.5-turbo/model.json
@@ -7,9 +7,7 @@
     "description": "OpenAI GPT 3.5 model is extremely good",
     "format": "api",
     "settings": {},
-    "parameters": {
-      "max_tokens": 4096
-    },
+    "parameters": {},
     "metadata": {
       "author": "OpenAI",
       "tags": ["General", "Big Context Length"]
diff --git a/models/gpt-4/model.json b/models/gpt-4/model.json
new file mode 100644
index 000000000..c30578675
--- /dev/null
+++ b/models/gpt-4/model.json
@@ -0,0 +1,19 @@
+{
+    "source_url": "https://openai.com",
+    "id": "gpt-4",
+    "object": "model",
+    "name": "OpenAI GPT 3.5",
+    "version": 1.0,
+    "description": "OpenAI GPT 3.5 model is extremely good",
+    "format": "api",
+    "settings": {},
+    "parameters": {
+      "max_tokens": 4096
+    },
+    "metadata": {
+      "author": "OpenAI",
+      "tags": ["General", "Big Context Length"]
+    },
+    "engine": "openai"
+}
+  
\ No newline at end of file

From 5f3cf2bcce7f3fe99c91dd7a9a72094510d67f22 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 18:09:53 +0700
Subject: [PATCH 45/58] fix: Temporary disable model parameters spreading

---
 extensions/inference-nitro-extension/src/helpers/sse.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts
index 65cb0d6ba..6edad302c 100644
--- a/extensions/inference-nitro-extension/src/helpers/sse.ts
+++ b/extensions/inference-nitro-extension/src/helpers/sse.ts
@@ -16,8 +16,8 @@ export function requestInference(
       messages: recentMessages,
       model: model.id,
       stream: true,
-      // Model parameters spreading
-      ...model.parameters,
+      // TODO: Model parameters spreading
+      // ...model.parameters,
     });
     fetch(INFERENCE_URL, {
       method: "POST",

From e1190ec41c76d85ad65a10c6d8da22fea17ed9f9 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 18:10:27 +0700
Subject: [PATCH 46/58] fix: Enforce openai inference to load settings from
 openai.json onLoad and onModelInit

---
 extensions/inference-openai-extension/src/index.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 95fac2fc0..1fecf17b5 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -57,7 +57,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
    */
   onLoad(): void {
     fs.mkdir(JanInferenceOpenAIExtension._homeDir)
-    this.writeDefaultEngineSettings()
+    JanInferenceOpenAIExtension.writeDefaultEngineSettings()
 
     // Events subscription
     events.on(EventName.OnMessageSent, (data) =>
@@ -90,7 +90,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     return
   }
 
-  private async writeDefaultEngineSettings() {
+  static async writeDefaultEngineSettings() {
     try {
       const engine_json = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName)
       if (await fs.checkFileExists(engine_json)) {
@@ -156,6 +156,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     if (model.engine !== 'openai') { return }
     else {
       JanInferenceOpenAIExtension._currentModel = model
+      JanInferenceOpenAIExtension.writeDefaultEngineSettings()
       // Todo: Check model list with API key
       events.emit(EventName.OnModelReady, model)
       // events.emit(EventName.OnModelFail, model)

From 975e9718bf9bdc442eb11de5fd0e927bcef242bd Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 18:10:36 +0700
Subject: [PATCH 47/58] chore: remove unused console.log

---
 web/hooks/useSendChatMessage.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 3bc80c75d..4ed080438 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -216,7 +216,6 @@ export default function useSendChatMessage() {
       await startModel(modelId)
       setQueuedMessage(false)
     }
-    console.log('messageRequest', messageRequest)
     events.emit(EventName.OnMessageSent, messageRequest)
   }
 

From 9daee14167dc0d3c2c5ecb186fd7d522744a2539 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Tue, 5 Dec 2023 22:48:04 +0700
Subject: [PATCH 48/58] fix: Add hack waiting for model loading

---
 .../inference-nitro-extension/src/index.ts    |  1 -
 web/hooks/useSendChatMessage.ts               | 28 +++++++++++++++----
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index 4d6d87dea..f8bbd2a09 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -198,7 +198,6 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     instance: JanInferenceNitroExtension
   ) {
     if (data.model.engine !== 'nitro') { return }
-
     const timestamp = Date.now();
     const message: ThreadMessage = {
       id: ulid(),
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 4ed080438..2a9e7a102 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -50,7 +50,6 @@ export default function useSendChatMessage() {
   const [queuedMessage, setQueuedMessage] = useState(false)
 
   const modelRef = useRef<Model | undefined>()
-
   useEffect(() => {
     modelRef.current = activeModel
   }, [activeModel])
@@ -91,19 +90,35 @@ export default function useSendChatMessage() {
       id: ulid(),
       messages: messages,
       threadId: activeThread.id,
-      model: activeThread.assistants[0].model??selectedModel,
+      model: activeThread.assistants[0].model ?? selectedModel,
     }
 
     const modelId = selectedModel?.id ?? activeThread.assistants[0].model.id
 
     if (activeModel?.id !== modelId) {
       setQueuedMessage(true)
-      await startModel(modelId)
+      startModel(modelId)
+      await WaitForModelStarting(modelId)
       setQueuedMessage(false)
     }
     events.emit(EventName.OnMessageSent, messageRequest)
   }
 
+  // TODO: Refactor @louis
+  const WaitForModelStarting = async (modelId: string) => {
+    return new Promise<void>((resolve) => {
+      setTimeout(async () => {
+        if (modelRef.current?.id !== modelId) {
+          console.log('waiting for model to start')
+          await WaitForModelStarting(modelId)
+          resolve()
+        } else {
+          resolve()
+        }
+      }, 200)
+    })
+  }
+
   const sendChatMessage = async () => {
     if (!currentPrompt || currentPrompt.trim().length === 0) {
       return
@@ -180,8 +195,7 @@ export default function useSendChatMessage() {
       id: msgId,
       threadId: activeThread.id,
       messages,
-      parameters: activeThread.assistants[0].model.parameters,
-      model: selectedModel??activeThread.assistants[0].model,
+      model: selectedModel ?? activeThread.assistants[0].model,
     }
     const timestamp = Date.now()
     const threadMessage: ThreadMessage = {
@@ -213,9 +227,11 @@ export default function useSendChatMessage() {
 
     if (activeModel?.id !== modelId) {
       setQueuedMessage(true)
-      await startModel(modelId)
+      startModel(modelId)
+      await WaitForModelStarting(modelId)
       setQueuedMessage(false)
     }
+    console.log('messageRequest', messageRequest)
     events.emit(EventName.OnMessageSent, messageRequest)
   }
 

From 6cd4cb95feccccb3495dd2e95279954eb24a5377 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Wed, 6 Dec 2023 09:33:29 +0700
Subject: [PATCH 49/58] fix: model gpt json

---
 models/gpt-3.5-turbo-16k-0613/model.json | 4 ++--
 models/gpt-3.5-turbo/model.json          | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/models/gpt-3.5-turbo-16k-0613/model.json b/models/gpt-3.5-turbo-16k-0613/model.json
index 434b45cc0..b00b5fc88 100644
--- a/models/gpt-3.5-turbo-16k-0613/model.json
+++ b/models/gpt-3.5-turbo-16k-0613/model.json
@@ -2,9 +2,9 @@
     "source_url": "https://openai.com",
     "id": "gpt-3.5-turbo-16k-0613",
     "object": "model",
-    "name": "OpenAI GPT 3.5",
+    "name": "OpenAI GPT 3.5 Turbo 16k 0613",
     "version": 1.0,
-    "description": "OpenAI GPT 3.5 model is extremely good",
+    "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good",
     "format": "api",
     "settings": {},
     "parameters": {
diff --git a/models/gpt-3.5-turbo/model.json b/models/gpt-3.5-turbo/model.json
index c34e5e69f..e8e29dafd 100644
--- a/models/gpt-3.5-turbo/model.json
+++ b/models/gpt-3.5-turbo/model.json
@@ -2,9 +2,9 @@
     "source_url": "https://openai.com",
     "id": "gpt-3.5-turbo",
     "object": "model",
-    "name": "OpenAI GPT 3.5",
+    "name": "OpenAI GPT 3.5 Turbo",
     "version": 1.0,
-    "description": "OpenAI GPT 3.5 model is extremely good",
+    "description": "OpenAI GPT 3.5 Turbo model is extremely good",
     "format": "api",
     "settings": {},
     "parameters": {},

From fbf8ff9d3d0ad5a354e3ff4c1429ff195d95e16e Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Wed, 6 Dec 2023 09:42:21 +0700
Subject: [PATCH 50/58] fix: Update lint for engine in ThreadAssistantInfo

---
 core/src/types/index.ts         | 2 +-
 web/hooks/useCreateNewThread.ts | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index 99f7b1d0f..724c7b6a6 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -157,7 +157,7 @@ export type ModelInfo = {
   id: string;
   settings: ModelSettingParams;
   parameters: ModelRuntimeParams;
-  engine: InferenceEngine;
+  engine?: InferenceEngine;
 };
 
 /**
diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts
index 7526feb49..2ba9adb3f 100644
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@@ -67,6 +67,7 @@ export const useCreateNewThread = () => {
           top_p: 0,
           stream: false,
         },
+        engine: undefined
       },
       instructions: assistant.instructions,
     }

From 1177007ad6f1f050e74f7b23a6ae4fc5b223ac1a Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Thu, 7 Dec 2023 15:13:04 +0700
Subject: [PATCH 51/58] fix: Update code based on comments from @james

---
 core/src/fs.ts                                |   4 +-
 core/src/types/index.ts                       |   5 +-
 electron/handlers/fs.ts                       |   2 +-
 electron/invokers/fs.ts                       |   2 +-
 .../src/@types/global.d.ts                    |  15 ++-
 .../inference-nitro-extension/src/index.ts    | 104 ++++++++++--------
 .../inference-nitro-extension/src/module.ts   |  64 ++++++-----
 .../src/@types/global.d.ts                    |  32 +++---
 .../inference-openai-extension/src/index.ts   |  87 ++++++++-------
 web/hooks/useSendChatMessage.ts               |   1 -
 10 files changed, 166 insertions(+), 150 deletions(-)

diff --git a/core/src/fs.ts b/core/src/fs.ts
index 99c8cbccf..d15bf6230 100644
--- a/core/src/fs.ts
+++ b/core/src/fs.ts
@@ -25,7 +25,7 @@ const readFile: (path: string) => Promise<any> = (path) => global.core.api?.read
  * @param {string} path
  * @returns {boolean} A boolean indicating whether the path is a file.
  */
-const checkFileExists = (path: string): Promise<boolean> => global.core.api?.checkFileExists(path)
+const exists = (path: string): Promise<boolean> => global.core.api?.exists(path)
 /**
  * List the directory files
  * @param {string} path - The path of the directory to list files.
@@ -75,7 +75,7 @@ export const fs = {
   isDirectory,
   writeFile,
   readFile,
-  checkFileExists,
+  exists,
   listFiles,
   mkdir,
   rmdir,
diff --git a/core/src/types/index.ts b/core/src/types/index.ts
index 724c7b6a6..5b91fcc8a 100644
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@@ -41,11 +41,8 @@ export type MessageRequest = {
   /** Messages for constructing a chat completion request **/
   messages?: ChatCompletionMessage[];
 
-  /** Runtime parameters for constructing a chat completion request **/
-  parameters?: ModelRuntimeParams;
-
   /** Settings for constructing a chat completion request **/
-  model?: ModelInfo
+  model?: ModelInfo;
 };
 
 /**
diff --git a/electron/handlers/fs.ts b/electron/handlers/fs.ts
index 1e2df5c59..acc0ed2da 100644
--- a/electron/handlers/fs.ts
+++ b/electron/handlers/fs.ts
@@ -56,7 +56,7 @@ export function handleFsIPCs() {
    * @param path - The path of the file to check.
    * @returns A promise that resolves with a boolean indicating whether the file exists.
    */
-  ipcMain.handle('checkFileExists', async (_event, path: string) => {
+  ipcMain.handle('exists', async (_event, path: string) => {
     return new Promise((resolve, reject) => {
       const fullPath = join(userSpacePath, path)
       fs.existsSync(fullPath) ? resolve(true) : resolve(false)
diff --git a/electron/invokers/fs.ts b/electron/invokers/fs.ts
index e59eb4c86..e1aa67cca 100644
--- a/electron/invokers/fs.ts
+++ b/electron/invokers/fs.ts
@@ -31,7 +31,7 @@ export function fsInvokers() {
      * Reads a file at the specified path.
      * @param {string} path - The path of the file to read.
      */
-    checkFileExists: (path: string) => ipcRenderer.invoke('checkFileExists', path),
+    exists: (path: string) => ipcRenderer.invoke('exists', path),
     
     /**
      * Writes data to a file at the specified path.
diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts
index 5776cca20..642f10909 100644
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@@ -10,10 +10,10 @@ declare const INFERENCE_URL: string;
  * @property settings.embedding - Whether to use embedding.
  */
 interface EngineSettings {
-    ctx_len: number;
-    ngl: number;
-    cont_batching: boolean;
-    embedding: boolean;
+  ctx_len: number;
+  ngl: number;
+  cont_batching: boolean;
+  embedding: boolean;
 }
 
 /**
@@ -21,7 +21,6 @@ interface EngineSettings {
  * @property error - An error message if the model fails to load.
  */
 interface ModelOperationResponse {
-    error?: any;
-    modelFile?: string;
-  }
-  
\ No newline at end of file
+  error?: any;
+  modelFile?: string;
+}
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index f8bbd2a09..a1c125ce5 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -33,17 +33,17 @@ import { join } from "path";
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
 export default class JanInferenceNitroExtension implements InferenceExtension {
-  private static readonly _homeDir = 'engines'
-  private static readonly _engineMetadataFileName = 'nitro.json'
+  private static readonly _homeDir = "engines";
+  private static readonly _engineMetadataFileName = "nitro.json";
 
-  static _currentModel: Model;
+  private static _currentModel: Model;
 
-  static _engineSettings: EngineSettings = {
-    "ctx_len": 2048,
-    "ngl": 100,
-    "cont_batching": false,
-    "embedding": false
-  }
+  private static _engineSettings: EngineSettings = {
+    ctx_len: 2048,
+    ngl: 100,
+    cont_batching: false,
+    embedding: false,
+  };
 
   controller = new AbortController();
   isCancelled = false;
@@ -59,12 +59,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
-    fs.mkdir(JanInferenceNitroExtension._homeDir)
-    this.writeDefaultEngineSettings()
+    fs.mkdir(JanInferenceNitroExtension._homeDir);
+    this.writeDefaultEngineSettings();
 
     // Events subscription
     events.on(EventName.OnMessageSent, (data) =>
-    JanInferenceNitroExtension.handleMessageRequest(data, this)
+      JanInferenceNitroExtension.handleMessageRequest(data, this)
     );
 
     events.on(EventName.OnModelInit, (model: Model) => {
@@ -112,42 +112,51 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
 
   private async writeDefaultEngineSettings() {
     try {
-      const engine_json = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName)
-      if (await fs.checkFileExists(engine_json)) {
-        JanInferenceNitroExtension._engineSettings = JSON.parse(await fs.readFile(engine_json))
-      }
-      else {
-        await fs.writeFile(engine_json, JSON.stringify(JanInferenceNitroExtension._engineSettings, null, 2))
+      const engineFile = join(
+        JanInferenceNitroExtension._homeDir,
+        JanInferenceNitroExtension._engineMetadataFileName
+      );
+      if (await fs.exists(engineFile)) {
+        JanInferenceNitroExtension._engineSettings = JSON.parse(
+          await fs.readFile(engineFile)
+        );
+      } else {
+        await fs.writeFile(
+          engineFile,
+          JSON.stringify(JanInferenceNitroExtension._engineSettings, null, 2)
+        );
       }
     } catch (err) {
-      console.error(err)
+      console.error(err);
     }
   }
 
   private static async handleModelInit(model: Model) {
-    if (model.engine !== "nitro") { return }
+    if (model.engine !== "nitro") {
+      return;
+    }
     const userSpacePath = await getUserSpace();
     const modelFullPath = join(userSpacePath, "models", model.id, model.id);
 
-    const nitro_init_result = await executeOnMain(MODULE, "initModel", {
+    const nitroInitResult = await executeOnMain(MODULE, "initModel", {
       modelFullPath: modelFullPath,
-      model: model
+      model: model,
     });
 
-    if (nitro_init_result.error === null) {
-      events.emit(EventName.OnModelFail, model)
-    }
-    else{
+    if (nitroInitResult.error === null) {
+      events.emit(EventName.OnModelFail, model);
+    } else {
       JanInferenceNitroExtension._currentModel = model;
       events.emit(EventName.OnModelReady, model);
     }
   }
 
   private static async handleModelStop(model: Model) {
-    if (model.engine !== 'nitro') { return }
-    else {
-      await executeOnMain(MODULE, "stopModel")
-      events.emit(EventName.OnModelStopped, model)
+    if (model.engine !== "nitro") {
+      return;
+    } else {
+      await executeOnMain(MODULE, "stopModel");
+      events.emit(EventName.OnModelStopped, model);
     }
   }
 
@@ -171,18 +180,17 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
 
     return new Promise(async (resolve, reject) => {
       requestInference(
-          data.messages ?? [], 
-          JanInferenceNitroExtension._engineSettings, 
-          JanInferenceNitroExtension._currentModel
-        )
-        .subscribe({
+        data.messages ?? [],
+        JanInferenceNitroExtension._engineSettings,
+        JanInferenceNitroExtension._currentModel
+      ).subscribe({
         next: (_content) => {},
-          complete: async () => {
-            resolve(message);
-          },
-          error: async (err) => {
-            reject(err);
-          },
+        complete: async () => {
+          resolve(message);
+        },
+        error: async (err) => {
+          reject(err);
+        },
       });
     });
   }
@@ -197,7 +205,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     data: MessageRequest,
     instance: JanInferenceNitroExtension
   ) {
-    if (data.model.engine !== 'nitro') { return }
+    if (data.model.engine !== "nitro") {
+      return;
+    }
     const timestamp = Date.now();
     const message: ThreadMessage = {
       id: ulid(),
@@ -216,11 +226,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     instance.controller = new AbortController();
 
     requestInference(
-        data.messages ?? [], 
-        JanInferenceNitroExtension._engineSettings, 
-        JanInferenceNitroExtension._currentModel, 
-        instance.controller
-      ).subscribe({
+      data.messages ?? [],
+      JanInferenceNitroExtension._engineSettings,
+      JanInferenceNitroExtension._currentModel,
+      instance.controller
+    ).subscribe({
       next: (content) => {
         const messageContent: ThreadContent = {
           type: ContentType.Text,
diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts
index 1323dca6d..d36553f40 100644
--- a/extensions/inference-nitro-extension/src/module.ts
+++ b/extensions/inference-nitro-extension/src/module.ts
@@ -26,9 +26,9 @@ let currentModelFile = null;
  */
 function stopModel(): Promise<ModelOperationResponse> {
   return new Promise((resolve, reject) => {
-    checkAndUnloadNitro()
-    resolve({ error: undefined})
-  })
+    checkAndUnloadNitro();
+    resolve({ error: undefined });
+  });
 }
 
 /**
@@ -39,33 +39,32 @@ function stopModel(): Promise<ModelOperationResponse> {
  * TODO: Should it be startModel instead?
  */
 function initModel(wrapper: any): Promise<ModelOperationResponse> {
-    currentModelFile = wrapper.modelFullPath;
-    if (wrapper.model.engine !== "nitro") {
-      return Promise.resolve({ error: "Not a nitro model" })
-    }
-    else {
-      log.info("Started to load model " + wrapper.model.modelFullPath);
-      const settings = {
-        llama_model_path: currentModelFile,
-        ...wrapper.model.settings,
-      };
-      log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
-      return (
-        // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
-        validateModelVersion()
-          .then(checkAndUnloadNitro)
-          // 2. Spawn the Nitro subprocess
-          .then(spawnNitroProcess)
-          // 4. Load the model into the Nitro subprocess (HTTP POST request)
-          .then(() => loadLLMModel(settings))
-          // 5. Check if the model is loaded successfully
-          .then(validateModelStatus)
-          .catch((err) => {
-            log.error("error: " + JSON.stringify(err));
-            return { error: err, currentModelFile };
-          })
-        );
-    }
+  currentModelFile = wrapper.modelFullPath;
+  if (wrapper.model.engine !== "nitro") {
+    return Promise.resolve({ error: "Not a nitro model" });
+  } else {
+    log.info("Started to load model " + wrapper.model.modelFullPath);
+    const settings = {
+      llama_model_path: currentModelFile,
+      ...wrapper.model.settings,
+    };
+    log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`);
+    return (
+      // 1. Check if the port is used, if used, attempt to unload model / kill nitro process
+      validateModelVersion()
+        .then(checkAndUnloadNitro)
+        // 2. Spawn the Nitro subprocess
+        .then(spawnNitroProcess)
+        // 4. Load the model into the Nitro subprocess (HTTP POST request)
+        .then(() => loadLLMModel(settings))
+        // 5. Check if the model is loaded successfully
+        .then(validateModelStatus)
+        .catch((err) => {
+          log.error("error: " + JSON.stringify(err));
+          return { error: err, currentModelFile };
+        })
+    );
+  }
 }
 
 /**
@@ -148,13 +147,12 @@ async function checkAndUnloadNitro() {
     // If inUse - try unload or kill process, otherwise do nothing
     if (inUse) {
       // Attempt to unload model
-      return await fetch(NITRO_HTTP_UNLOAD_MODEL_URL, {
+      return fetch(NITRO_HTTP_UNLOAD_MODEL_URL, {
         method: "GET",
         headers: {
           "Content-Type": "application/json",
         },
-      })
-      .catch((err) => {
+      }).catch((err) => {
         console.error(err);
         // Fallback to kill the port
         return killSubprocess();
diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts
index 988c3c7db..5e9fd4d8a 100644
--- a/extensions/inference-openai-extension/src/@types/global.d.ts
+++ b/extensions/inference-openai-extension/src/@types/global.d.ts
@@ -3,25 +3,25 @@ import { Model } from "@janhq/core";
 declare const MODULE: string;
 
 declare interface EngineSettings {
-    base_url?: string;
-    api_key?: string;
+  base_url?: string;
+  api_key?: string;
 }
 
 enum OpenAIChatCompletionModelName {
-    'gpt-3.5-turbo-instruct' = 'gpt-3.5-turbo-instruct',
-    'gpt-3.5-turbo-instruct-0914' = 'gpt-3.5-turbo-instruct-0914',
-    'gpt-4-1106-preview' = 'gpt-4-1106-preview',
-    'gpt-3.5-turbo-0613' = 'gpt-3.5-turbo-0613',
-    'gpt-3.5-turbo-0301' = 'gpt-3.5-turbo-0301',
-    'gpt-3.5-turbo' = 'gpt-3.5-turbo',
-    'gpt-3.5-turbo-16k-0613' = 'gpt-3.5-turbo-16k-0613',
-    'gpt-3.5-turbo-1106' = 'gpt-3.5-turbo-1106',
-    'gpt-4-vision-preview' = 'gpt-4-vision-preview',
-    'gpt-4' = 'gpt-4',
-    'gpt-4-0314' = 'gpt-4-0314',
-    'gpt-4-0613' = 'gpt-4-0613',
+  "gpt-3.5-turbo-instruct" = "gpt-3.5-turbo-instruct",
+  "gpt-3.5-turbo-instruct-0914" = "gpt-3.5-turbo-instruct-0914",
+  "gpt-4-1106-preview" = "gpt-4-1106-preview",
+  "gpt-3.5-turbo-0613" = "gpt-3.5-turbo-0613",
+  "gpt-3.5-turbo-0301" = "gpt-3.5-turbo-0301",
+  "gpt-3.5-turbo" = "gpt-3.5-turbo",
+  "gpt-3.5-turbo-16k-0613" = "gpt-3.5-turbo-16k-0613",
+  "gpt-3.5-turbo-1106" = "gpt-3.5-turbo-1106",
+  "gpt-4-vision-preview" = "gpt-4-vision-preview",
+  "gpt-4" = "gpt-4",
+  "gpt-4-0314" = "gpt-4-0314",
+  "gpt-4-0613" = "gpt-4-0613",
 }
 
 declare type OpenAIModel = Omit<Model, "id"> & {
-    id: OpenAIChatCompletionModelName;
-};
\ No newline at end of file
+  id: OpenAIChatCompletionModelName;
+};
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 1fecf17b5..4eb179da8 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -31,14 +31,14 @@ import { EngineSettings, OpenAIModel } from "./@types/global";
  * It also subscribes to events emitted by the @janhq/core package and handles new message requests.
  */
 export default class JanInferenceOpenAIExtension implements InferenceExtension {
-  private static readonly _homeDir = 'engines'
-  private static readonly _engineMetadataFileName = 'openai.json'
-  
-  static _currentModel: OpenAIModel;
+  private static readonly _homeDir = "engines";
+  private static readonly _engineMetadataFileName = "openai.json";
 
-  static _engineSettings: EngineSettings = {
-    "base_url": "https://api.openai.com/v1",
-    "api_key": "sk-<your key here>"
+  private static _currentModel: OpenAIModel;
+
+  private static _engineSettings: EngineSettings = {
+    base_url: "https://api.openai.com/v1",
+    api_key: "sk-<your key here>",
   };
 
   controller = new AbortController();
@@ -56,8 +56,8 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
    * Subscribes to events emitted by the @janhq/core package.
    */
   onLoad(): void {
-    fs.mkdir(JanInferenceOpenAIExtension._homeDir)
-    JanInferenceOpenAIExtension.writeDefaultEngineSettings()
+    fs.mkdir(JanInferenceOpenAIExtension._homeDir);
+    JanInferenceOpenAIExtension.writeDefaultEngineSettings();
 
     // Events subscription
     events.on(EventName.OnMessageSent, (data) =>
@@ -87,20 +87,27 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     modelId: string,
     settings?: ModelSettingParams
   ): Promise<void> {
-    return
+    return;
   }
 
   static async writeDefaultEngineSettings() {
     try {
-      const engine_json = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName)
-      if (await fs.checkFileExists(engine_json)) {
-        JanInferenceOpenAIExtension._engineSettings = JSON.parse(await fs.readFile(engine_json))
-      }
-      else {
-        await fs.writeFile(engine_json, JSON.stringify(JanInferenceOpenAIExtension._engineSettings, null, 2))
+      const engineFile = join(
+        JanInferenceOpenAIExtension._homeDir,
+        JanInferenceOpenAIExtension._engineMetadataFileName
+      );
+      if (await fs.exists(engineFile)) {
+        JanInferenceOpenAIExtension._engineSettings = JSON.parse(
+          await fs.readFile(engineFile)
+        );
+      } else {
+        await fs.writeFile(
+          engineFile,
+          JSON.stringify(JanInferenceOpenAIExtension._engineSettings, null, 2)
+        );
       }
     } catch (err) {
-      console.error(err)
+      console.error(err);
     }
   }
   /**
@@ -137,35 +144,39 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     };
 
     return new Promise(async (resolve, reject) => {
-      requestInference(data.messages ?? [], 
-          JanInferenceOpenAIExtension._engineSettings, 
-          JanInferenceOpenAIExtension._currentModel)
-        .subscribe({
-          next: (_content) => {},
-          complete: async () => {
-            resolve(message);
-          },
-          error: async (err) => {
-            reject(err);
-          },
+      requestInference(
+        data.messages ?? [],
+        JanInferenceOpenAIExtension._engineSettings,
+        JanInferenceOpenAIExtension._currentModel
+      ).subscribe({
+        next: (_content) => {},
+        complete: async () => {
+          resolve(message);
+        },
+        error: async (err) => {
+          reject(err);
+        },
       });
     });
   }
 
   private static async handleModelInit(model: OpenAIModel) {
-    if (model.engine !== 'openai') { return }
-    else {
-      JanInferenceOpenAIExtension._currentModel = model
-      JanInferenceOpenAIExtension.writeDefaultEngineSettings()
+    if (model.engine !== "openai") {
+      return;
+    } else {
+      JanInferenceOpenAIExtension._currentModel = model;
+      JanInferenceOpenAIExtension.writeDefaultEngineSettings();
       // Todo: Check model list with API key
-      events.emit(EventName.OnModelReady, model)
+      events.emit(EventName.OnModelReady, model);
       // events.emit(EventName.OnModelFail, model)
     }
   }
 
   private static async handleModelStop(model: OpenAIModel) {
-    if (model.engine !== 'openai') { return }
-    events.emit(EventName.OnModelStopped, model)
+    if (model.engine !== "openai") {
+      return;
+    }
+    events.emit(EventName.OnModelStopped, model);
   }
 
   /**
@@ -178,8 +189,10 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     data: MessageRequest,
     instance: JanInferenceOpenAIExtension
   ) {
-    if (data.model.engine !== 'openai') { return }
-    
+    if (data.model.engine !== "openai") {
+      return;
+    }
+
     const timestamp = Date.now();
     const message: ThreadMessage = {
       id: ulid(),
diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts
index 2a9e7a102..970aedbec 100644
--- a/web/hooks/useSendChatMessage.ts
+++ b/web/hooks/useSendChatMessage.ts
@@ -231,7 +231,6 @@ export default function useSendChatMessage() {
       await WaitForModelStarting(modelId)
       setQueuedMessage(false)
     }
-    console.log('messageRequest', messageRequest)
     events.emit(EventName.OnMessageSent, messageRequest)
   }
 

From ef9dfc9cce750953e6369109898ae41115e8e161 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 8 Dec 2023 16:25:30 +0700
Subject: [PATCH 52/58] chore: add ready state to remote models

---
 models/gpt-3.5-turbo-16k-0613/model.json | 3 ++-
 models/gpt-3.5-turbo/model.json          | 3 ++-
 models/gpt-4/model.json                  | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/models/gpt-3.5-turbo-16k-0613/model.json b/models/gpt-3.5-turbo-16k-0613/model.json
index b00b5fc88..c7e2f0d1e 100644
--- a/models/gpt-3.5-turbo-16k-0613/model.json
+++ b/models/gpt-3.5-turbo-16k-0613/model.json
@@ -14,6 +14,7 @@
       "author": "OpenAI",
       "tags": ["General", "Big Context Length"]
     },
-    "engine": "openai"
+    "engine": "openai",
+    "state": "ready"
 }
   
\ No newline at end of file
diff --git a/models/gpt-3.5-turbo/model.json b/models/gpt-3.5-turbo/model.json
index e8e29dafd..91e4ca8f4 100644
--- a/models/gpt-3.5-turbo/model.json
+++ b/models/gpt-3.5-turbo/model.json
@@ -12,6 +12,7 @@
       "author": "OpenAI",
       "tags": ["General", "Big Context Length"]
     },
-    "engine": "openai"
+    "engine": "openai",
+    "state": "ready"
 }
   
\ No newline at end of file
diff --git a/models/gpt-4/model.json b/models/gpt-4/model.json
index c30578675..8883fd8ef 100644
--- a/models/gpt-4/model.json
+++ b/models/gpt-4/model.json
@@ -14,6 +14,7 @@
       "author": "OpenAI",
       "tags": ["General", "Big Context Length"]
     },
-    "engine": "openai"
+    "engine": "openai",
+    "state": "ready"
 }
   
\ No newline at end of file

From b939692187d67ff5d03c5b8f2f6e28412d2330e1 Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Fri, 8 Dec 2023 16:38:13 +0700
Subject: [PATCH 53/58] chore: stop inference event

---
 core/src/events.ts                            |  2 +
 core/src/extensions/inference.ts              | 18 +-------
 .../inference-nitro-extension/src/index.ts    | 43 ++++++-------------
 .../inference-openai-extension/src/index.ts   | 39 +++++------------
 web/screens/Chat/MessageToolbar/index.tsx     |  5 +--
 .../ExploreModelItemHeader/index.tsx          | 20 ++-------
 6 files changed, 32 insertions(+), 95 deletions(-)

diff --git a/core/src/events.ts b/core/src/events.ts
index bfaf3ea58..1acbef918 100644
--- a/core/src/events.ts
+++ b/core/src/events.ts
@@ -18,6 +18,8 @@ export enum EventName {
   OnModelStop = "OnModelStop",
   /** The `OnModelStopped` event is emitted when a model stopped ok. */
   OnModelStopped = "OnModelStopped",
+  /** The `OnInferenceStopped` event is emitted when a inference is stopped. */
+  OnInferenceStopped = "OnInferenceStopped",
 }
 
 /**
diff --git a/core/src/extensions/inference.ts b/core/src/extensions/inference.ts
index 483ba1339..9453a06d5 100644
--- a/core/src/extensions/inference.ts
+++ b/core/src/extensions/inference.ts
@@ -5,26 +5,10 @@ import { BaseExtension } from "../extension";
  * Inference extension. Start, stop and inference models.
  */
 export abstract class InferenceExtension extends BaseExtension {
-  /**
-   * Initializes the model for the extension.
-   * @param modelId - The ID of the model to initialize.
-   */
-  abstract initModel(modelId: string, settings?: ModelSettingParams): Promise<void>;
-
-  /**
-   * Stops the model for the extension.
-   */
-  abstract stopModel(): Promise<void>;
-
-  /**
-   * Stops the streaming inference.
-   */
-  abstract stopInference(): Promise<void>;
-
   /**
    * Processes an inference request.
    * @param data - The data for the inference request.
    * @returns The result of the inference request.
    */
-  abstract inferenceRequest(data: MessageRequest): Promise<ThreadMessage>;
+  abstract inference(data: MessageRequest): Promise<ThreadMessage>;
 }
diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts
index a1c125ce5..975d94100 100644
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@@ -74,41 +74,17 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     events.on(EventName.OnModelStop, (model: Model) => {
       JanInferenceNitroExtension.handleModelStop(model);
     });
+
+    events.on(EventName.OnInferenceStopped, () => {
+      JanInferenceNitroExtension.handleInferenceStopped(this);
+    });
   }
 
   /**
    * Stops the model inference.
    */
-  onUnload(): void {
-    this.stopModel();
-  }
+  onUnload(): void {}
 
-  /**
-   * Initializes the model with the specified file name.
-   * @param {string} modelId - The ID of the model to initialize.
-   * @returns {Promise<void>} A promise that resolves when the model is initialized.
-   */
-  async initModel(
-    modelId: string,
-    settings?: ModelSettingParams
-  ): Promise<void> {}
-
-  /**
-   * Stops the model.
-   * @returns {Promise<void>} A promise that resolves when the model is stopped.
-   */
-  async stopModel(): Promise<void> {
-    return executeOnMain(MODULE, "killSubprocess");
-  }
-
-  /**
-   * Stops streaming inference.
-   * @returns {Promise<void>} A promise that resolves when the streaming is stopped.
-   */
-  async stopInference(): Promise<void> {
-    this.isCancelled = true;
-    this.controller?.abort();
-  }
 
   private async writeDefaultEngineSettings() {
     try {
@@ -160,12 +136,19 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
     }
   }
 
+  private static async handleInferenceStopped(
+    instance: JanInferenceNitroExtension
+  ) {
+    instance.isCancelled = true;
+    instance.controller?.abort();
+  }
+
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
    * @returns {Promise<any>} A promise that resolves with the inference response.
    */
-  async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
+  async inference(data: MessageRequest): Promise<ThreadMessage> {
     const timestamp = Date.now();
     const message: ThreadMessage = {
       thread_id: data.threadId,
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 4eb179da8..06e0f5e04 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -71,6 +71,9 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     events.on(EventName.OnModelStop, (model: OpenAIModel) => {
       JanInferenceOpenAIExtension.handleModelStop(model);
     });
+    events.on(EventName.OnInferenceStopped, () => {
+      JanInferenceOpenAIExtension.handleInferenceStopped(this);
+    });
   }
 
   /**
@@ -78,18 +81,6 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
    */
   onUnload(): void {}
 
-  /**
-   * Initializes the model with the specified file name.
-   * @param {string} modelId - The ID of the model to initialize.
-   * @returns {Promise<void>} A promise that resolves when the model is initialized.
-   */
-  async initModel(
-    modelId: string,
-    settings?: ModelSettingParams
-  ): Promise<void> {
-    return;
-  }
-
   static async writeDefaultEngineSettings() {
     try {
       const engineFile = join(
@@ -110,27 +101,13 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
       console.error(err);
     }
   }
-  /**
-   * Stops the model.
-   * @returns {Promise<void>} A promise that resolves when the model is stopped.
-   */
-  async stopModel(): Promise<void> {}
-
-  /**
-   * Stops streaming inference.
-   * @returns {Promise<void>} A promise that resolves when the streaming is stopped.
-   */
-  async stopInference(): Promise<void> {
-    this.isCancelled = true;
-    this.controller?.abort();
-  }
 
   /**
    * Makes a single response inference request.
    * @param {MessageRequest} data - The data for the inference request.
    * @returns {Promise<any>} A promise that resolves with the inference response.
    */
-  async inferenceRequest(data: MessageRequest): Promise<ThreadMessage> {
+  async inference(data: MessageRequest): Promise<ThreadMessage> {
     const timestamp = Date.now();
     const message: ThreadMessage = {
       thread_id: data.threadId,
@@ -168,7 +145,6 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
       JanInferenceOpenAIExtension.writeDefaultEngineSettings();
       // Todo: Check model list with API key
       events.emit(EventName.OnModelReady, model);
-      // events.emit(EventName.OnModelFail, model)
     }
   }
 
@@ -179,6 +155,13 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
     events.emit(EventName.OnModelStopped, model);
   }
 
+  private static async handleInferenceStopped(
+    instance: JanInferenceOpenAIExtension
+  ) {
+    instance.isCancelled = true;
+    instance.controller?.abort();
+  }
+
   /**
    * Handles a new message request by making an inference request and emitting events.
    * Function registered in event manager, should be static to avoid binding issues.
diff --git a/web/screens/Chat/MessageToolbar/index.tsx b/web/screens/Chat/MessageToolbar/index.tsx
index f877e1bdb..fe7cac1f5 100644
--- a/web/screens/Chat/MessageToolbar/index.tsx
+++ b/web/screens/Chat/MessageToolbar/index.tsx
@@ -30,9 +30,8 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => {
   const { resendChatMessage } = useSendChatMessage()
 
   const onStopInferenceClick = async () => {
-    await extensionManager
-      .get<InferenceExtension>(ExtensionType.Inference)
-      ?.stopInference()
+    events.emit(EventName.OnInferenceStopped, {})
+
     setTimeout(() => {
       events.emit(EventName.OnMessageUpdate, {
         ...message,
diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
index 69cd8af3e..ba23056c6 100644
--- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
+++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx
@@ -55,23 +55,9 @@ const ExploreModelItemHeader: React.FC<Props> = ({ model, onClick, open }) => {
 
   const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null
 
-  let downloadButton;
-
-  if (model.engine === 'openai') {
-    downloadButton = (
-      <Button onClick={() => onDownloadClick()}>
-        Use
-      </Button>
-    );
-  } else if (model.engine === 'nitro') {
-    downloadButton = (
-      <Button onClick={() => onDownloadClick()}>
-        {model.metadata.size
-          ? `Download (${toGigabytes(model.metadata.size)})`
-          : 'Download'}
-      </Button>
-    );
-  }
+  let downloadButton = (
+    <Button onClick={() => onDownloadClick()}>Download</Button>
+  )
 
   const onUseModelClick = () => {
     startModel(model.id)

From f9e73b0bbdf7949c1b740323671e3f2a4c71a834 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Fri, 8 Dec 2023 22:42:07 +0700
Subject: [PATCH 54/58] fix: Change base_url to full_url

---
 extensions/inference-openai-extension/src/@types/global.d.ts | 2 +-
 extensions/inference-openai-extension/src/helpers/sse.ts     | 2 +-
 extensions/inference-openai-extension/src/index.ts           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts
index 5e9fd4d8a..bb0c6e9bf 100644
--- a/extensions/inference-openai-extension/src/@types/global.d.ts
+++ b/extensions/inference-openai-extension/src/@types/global.d.ts
@@ -3,7 +3,7 @@ import { Model } from "@janhq/core";
 declare const MODULE: string;
 
 declare interface EngineSettings {
-  base_url?: string;
+  full_url?: string;
   api_key?: string;
 }
 
diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts
index bc170a817..d13eb7c33 100644
--- a/extensions/inference-openai-extension/src/helpers/sse.ts
+++ b/extensions/inference-openai-extension/src/helpers/sse.ts
@@ -22,7 +22,7 @@ export function requestInference(
       // Model parameters spreading
       ...model.parameters,
     });
-    fetch(`${engine.base_url}/chat/completions`, {
+    fetch(`${engine.full_url}`, {
       method: "POST",
       headers: {
         "Content-Type": "application/json",
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 06e0f5e04..8a7955746 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -37,7 +37,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
   private static _currentModel: OpenAIModel;
 
   private static _engineSettings: EngineSettings = {
-    base_url: "https://api.openai.com/v1",
+    full_url: "https://api.openai.com/v1/chat/completion",
     api_key: "sk-<your key here>",
   };
 

From 0ef9a581d39c8a361b498f91f8bbde2f826777f3 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sat, 9 Dec 2023 00:36:55 +0700
Subject: [PATCH 55/58] fix: BAT for nitro

---
 extensions/inference-nitro-extension/download.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
index 723268919..7df449040 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@@ -1,3 +1,3 @@
 @echo off
-set /p NITRO_VERSION=<./nitro/version.txt
+set /p NITRO_VERSION=<./bin/version.txt
 .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu

From c32ad0aff7a7de9a0a7c8c73b2326a33cde05945 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sat, 9 Dec 2023 00:42:48 +0700
Subject: [PATCH 56/58] fix: small change in nitro bin location

---
 extensions/inference-nitro-extension/download.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat
index 7df449040..1776b7dfe 100644
--- a/extensions/inference-nitro-extension/download.bat
+++ b/extensions/inference-nitro-extension/download.bat
@@ -1,3 +1,3 @@
 @echo off
 set /p NITRO_VERSION=<./bin/version.txt
-.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu
+.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./bin/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu

From 7e3e648e2acf2fac868507567bd0767238f17cf5 Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sat, 9 Dec 2023 00:56:24 +0700
Subject: [PATCH 57/58] fix: inference extensions small syntax fix

---
 extensions/inference-nitro-extension/src/helpers/sse.ts | 1 -
 extensions/inference-openai-extension/src/index.ts      | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/extensions/inference-nitro-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts
index 6edad302c..d9d8712dd 100644
--- a/extensions/inference-nitro-extension/src/helpers/sse.ts
+++ b/extensions/inference-nitro-extension/src/helpers/sse.ts
@@ -16,7 +16,6 @@ export function requestInference(
       messages: recentMessages,
       model: model.id,
       stream: true,
-      // TODO: Model parameters spreading
       // ...model.parameters,
     });
     fetch(INFERENCE_URL, {
diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts
index 8a7955746..7e3e6e71e 100644
--- a/extensions/inference-openai-extension/src/index.ts
+++ b/extensions/inference-openai-extension/src/index.ts
@@ -37,7 +37,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension {
   private static _currentModel: OpenAIModel;
 
   private static _engineSettings: EngineSettings = {
-    full_url: "https://api.openai.com/v1/chat/completion",
+    full_url: "https://api.openai.com/v1/chat/completions",
     api_key: "sk-<your key here>",
   };
 

From bbffaafcdcb32cbb5ab06dd5a001ccfb9fc61a0c Mon Sep 17 00:00:00 2001
From: hiro <vuonghoainam.work@gmail.com>
Date: Sat, 9 Dec 2023 00:56:39 +0700
Subject: [PATCH 58/58] feat: Added support for Azure OpenAI API

---
 .../inference-openai-extension/src/helpers/sse.ts      | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts
index d13eb7c33..c8ddefca6 100644
--- a/extensions/inference-openai-extension/src/helpers/sse.ts
+++ b/extensions/inference-openai-extension/src/helpers/sse.ts
@@ -15,12 +15,15 @@ export function requestInference(
   controller?: AbortController
 ): Observable<string> {
   return new Observable((subscriber) => {
+    let model_id: string = model.id
+    if (engine.full_url.includes("openai.azure.com")){
+      model_id = engine.full_url.split("/")[5]
+    }
     const requestBody = JSON.stringify({
       messages: recentMessages,
       stream: true,
-      model: model.id,
-      // Model parameters spreading
-      ...model.parameters,
+      model: model_id
+      // ...model.parameters,
     });
     fetch(`${engine.full_url}`, {
       method: "POST",
@@ -29,6 +32,7 @@ export function requestInference(
         Accept: "text/event-stream",
         "Access-Control-Allow-Origin": "*",
         Authorization: `Bearer ${engine.api_key}`,
+        "api-key": `${engine.api_key}`,
       },
       body: requestBody,
       signal: controller?.signal,