From c01737ff69c3d9aeb1aed53c88bedeec94741a04 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 15:46:01 +0700 Subject: [PATCH 01/58] refactor: Change inference-extension to inference-nitro-extension --- .gitignore | 8 +-- extensions/inference-extension/package.json | 57 ------------------- .../README.md | 0 .../bin}/linux-cpu/.gitkeep | 0 .../bin}/linux-cuda/.gitkeep | 0 .../bin}/linux-start.sh | 0 .../bin}/mac-arm64/.gitkeep | 0 .../bin}/mac-x64/.gitkeep | 0 .../bin}/version.txt | 0 .../bin}/win-cpu/.gitkeep | 0 .../bin}/win-cuda/.gitkeep | 0 .../bin}/win-start.bat | 0 .../download.bat | 0 .../inference-nitro-extension/package.json | 57 +++++++++++++++++++ .../src/@types/global.d.ts | 0 .../src/helpers/sse.ts | 0 .../src/index.ts | 0 .../src/module.ts | 2 +- .../tsconfig.json | 0 .../webpack.config.js | 0 20 files changed, 62 insertions(+), 62 deletions(-) delete mode 100644 extensions/inference-extension/package.json rename extensions/{inference-extension => inference-nitro-extension}/README.md (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/linux-cpu/.gitkeep (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/linux-cuda/.gitkeep (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/linux-start.sh (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/mac-arm64/.gitkeep (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/mac-x64/.gitkeep (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/version.txt (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/win-cpu/.gitkeep (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/win-cuda/.gitkeep (100%) rename extensions/{inference-extension/nitro => inference-nitro-extension/bin}/win-start.bat (100%) rename extensions/{inference-extension => inference-nitro-extension}/download.bat (100%) create mode 100644 extensions/inference-nitro-extension/package.json rename extensions/{inference-extension => inference-nitro-extension}/src/@types/global.d.ts (100%) rename extensions/{inference-extension => inference-nitro-extension}/src/helpers/sse.ts (100%) rename extensions/{inference-extension => inference-nitro-extension}/src/index.ts (100%) rename extensions/{inference-extension => inference-nitro-extension}/src/module.ts (99%) rename extensions/{inference-extension => inference-nitro-extension}/tsconfig.json (100%) rename extensions/{inference-extension => inference-nitro-extension}/webpack.config.js (100%) diff --git a/.gitignore b/.gitignore index 4bfb0576f..d400a3b81 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,7 @@ package-lock.json core/lib/** # Nitro binary files -extensions/inference-extension/nitro/*/nitro -extensions/inference-extension/nitro/*/*.exe -extensions/inference-extension/nitro/*/*.dll -extensions/inference-extension/nitro/*/*.metal \ No newline at end of file +extensions/inference-nitro-extension/bin/*/nitro +extensions/inference-nitro-extension/bin/*/*.exe +extensions/inference-nitro-extension/bin/*/*.dll +extensions/inference-nitro-extension/bin/*/*.metal \ No newline at end of file diff --git a/extensions/inference-extension/package.json b/extensions/inference-extension/package.json deleted file mode 100644 index 798d2e46d..000000000 --- a/extensions/inference-extension/package.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "name": "@janhq/inference-extension", - "version": "1.0.0", - "description": "Inference Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.", - "main": "dist/index.js", - "module": "dist/module.js", - "author": "Jan ", - "license": "AGPL-3.0", - "scripts": { - "build": "tsc -b . && webpack --config webpack.config.js", - "downloadnitro:linux": "NITRO_VERSION=$(cat ./nitro/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./nitro/linux-cpu && chmod +x ./nitro/linux-cpu/nitro && chmod +x ./nitro/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/linux-cuda && chmod +x ./nitro/linux-cuda/nitro && chmod +x ./nitro/linux-start.sh", - "downloadnitro:darwin": "NITRO_VERSION=$(cat ./nitro/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./nitro/mac-arm64 && chmod +x ./nitro/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./nitro/mac-x64 && chmod +x ./nitro/mac-x64/nitro", - "downloadnitro:win32": "download.bat", - "downloadnitro": "run-script-os", - "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish": "run-script-os" - }, - "exports": { - ".": "./dist/index.js", - "./main": "./dist/module.js" - }, - "devDependencies": { - "cpx": "^1.5.0", - "rimraf": "^3.0.2", - "run-script-os": "^1.1.6", - "webpack": "^5.88.2", - "webpack-cli": "^5.1.4" - }, - "dependencies": { - "@janhq/core": "file:../../core", - "download-cli": "^1.1.1", - "electron-log": "^5.0.1", - "fetch-retry": "^5.0.6", - "kill-port": "^2.0.1", - "path-browserify": "^1.0.1", - "rxjs": "^7.8.1", - "tcp-port-used": "^1.0.2", - "ts-loader": "^9.5.0", - "ulid": "^2.3.0" - }, - "engines": { - "node": ">=18.0.0" - }, - "files": [ - "dist/*", - "package.json", - "README.md" - ], - "bundleDependencies": [ - "tcp-port-used", - "kill-port", - "fetch-retry", - "electron-log" - ] -} diff --git a/extensions/inference-extension/README.md b/extensions/inference-nitro-extension/README.md similarity index 100% rename from extensions/inference-extension/README.md rename to extensions/inference-nitro-extension/README.md diff --git a/extensions/inference-extension/nitro/linux-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep similarity index 100% rename from extensions/inference-extension/nitro/linux-cpu/.gitkeep rename to extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep diff --git a/extensions/inference-extension/nitro/linux-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep similarity index 100% rename from extensions/inference-extension/nitro/linux-cuda/.gitkeep rename to extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep diff --git a/extensions/inference-extension/nitro/linux-start.sh b/extensions/inference-nitro-extension/bin/linux-start.sh similarity index 100% rename from extensions/inference-extension/nitro/linux-start.sh rename to extensions/inference-nitro-extension/bin/linux-start.sh diff --git a/extensions/inference-extension/nitro/mac-arm64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep similarity index 100% rename from extensions/inference-extension/nitro/mac-arm64/.gitkeep rename to extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep diff --git a/extensions/inference-extension/nitro/mac-x64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-x64/.gitkeep similarity index 100% rename from extensions/inference-extension/nitro/mac-x64/.gitkeep rename to extensions/inference-nitro-extension/bin/mac-x64/.gitkeep diff --git a/extensions/inference-extension/nitro/version.txt b/extensions/inference-nitro-extension/bin/version.txt similarity index 100% rename from extensions/inference-extension/nitro/version.txt rename to extensions/inference-nitro-extension/bin/version.txt diff --git a/extensions/inference-extension/nitro/win-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/win-cpu/.gitkeep similarity index 100% rename from extensions/inference-extension/nitro/win-cpu/.gitkeep rename to extensions/inference-nitro-extension/bin/win-cpu/.gitkeep diff --git a/extensions/inference-extension/nitro/win-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/win-cuda/.gitkeep similarity index 100% rename from extensions/inference-extension/nitro/win-cuda/.gitkeep rename to extensions/inference-nitro-extension/bin/win-cuda/.gitkeep diff --git a/extensions/inference-extension/nitro/win-start.bat b/extensions/inference-nitro-extension/bin/win-start.bat similarity index 100% rename from extensions/inference-extension/nitro/win-start.bat rename to extensions/inference-nitro-extension/bin/win-start.bat diff --git a/extensions/inference-extension/download.bat b/extensions/inference-nitro-extension/download.bat similarity index 100% rename from extensions/inference-extension/download.bat rename to extensions/inference-nitro-extension/download.bat diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json new file mode 100644 index 000000000..ef74fff08 --- /dev/null +++ b/extensions/inference-nitro-extension/package.json @@ -0,0 +1,57 @@ +{ + "name": "@janhq/inference-nitro-extension", + "version": "1.0.0", + "description": "Inference Engine for Nitro Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "tsc -b . && webpack --config webpack.config.js", + "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh", + "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", + "downloadnitro:win32": "download.bat", + "downloadnitro": "run-script-os", + "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish": "run-script-os" + }, + "exports": { + ".": "./dist/index.js", + "./main": "./dist/module.js" + }, + "devDependencies": { + "cpx": "^1.5.0", + "rimraf": "^3.0.2", + "run-script-os": "^1.1.6", + "webpack": "^5.88.2", + "webpack-cli": "^5.1.4" + }, + "dependencies": { + "@janhq/core": "file:../../core", + "download-cli": "^1.1.1", + "electron-log": "^5.0.1", + "fetch-retry": "^5.0.6", + "kill-port": "^2.0.1", + "path-browserify": "^1.0.1", + "rxjs": "^7.8.1", + "tcp-port-used": "^1.0.2", + "ts-loader": "^9.5.0", + "ulid": "^2.3.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ], + "bundleDependencies": [ + "tcp-port-used", + "kill-port", + "fetch-retry", + "electron-log" + ] +} diff --git a/extensions/inference-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts similarity index 100% rename from extensions/inference-extension/src/@types/global.d.ts rename to extensions/inference-nitro-extension/src/@types/global.d.ts diff --git a/extensions/inference-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts similarity index 100% rename from extensions/inference-extension/src/helpers/sse.ts rename to extensions/inference-nitro-extension/src/helpers/sse.ts diff --git a/extensions/inference-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts similarity index 100% rename from extensions/inference-extension/src/index.ts rename to extensions/inference-nitro-extension/src/index.ts diff --git a/extensions/inference-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts similarity index 99% rename from extensions/inference-extension/src/module.ts rename to extensions/inference-nitro-extension/src/module.ts index 72e418d6c..90582073e 100644 --- a/extensions/inference-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -168,7 +168,7 @@ function checkAndUnloadNitro() { */ async function spawnNitroProcess(): Promise { return new Promise((resolve, reject) => { - let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default + let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryName; if (process.platform === "win32") { diff --git a/extensions/inference-extension/tsconfig.json b/extensions/inference-nitro-extension/tsconfig.json similarity index 100% rename from extensions/inference-extension/tsconfig.json rename to extensions/inference-nitro-extension/tsconfig.json diff --git a/extensions/inference-extension/webpack.config.js b/extensions/inference-nitro-extension/webpack.config.js similarity index 100% rename from extensions/inference-extension/webpack.config.js rename to extensions/inference-nitro-extension/webpack.config.js From 48fd8de30cca14cd554b2e2c5e011bef3b76fc50 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 16:33:13 +0700 Subject: [PATCH 02/58] feat: Init commit for inference engine for openai extension --- .../inference-openai-extension/README.md | 78 ++++++++ .../inference-openai-extension/package.json | 57 ++++++ .../src/@types/global.d.ts | 2 + .../inference-openai-extension/src/index.ts | 184 ++++++++++++++++++ .../inference-openai-extension/tsconfig.json | 15 ++ .../webpack.config.js | 42 ++++ 6 files changed, 378 insertions(+) create mode 100644 extensions/inference-openai-extension/README.md create mode 100644 extensions/inference-openai-extension/package.json create mode 100644 extensions/inference-openai-extension/src/@types/global.d.ts create mode 100644 extensions/inference-openai-extension/src/index.ts create mode 100644 extensions/inference-openai-extension/tsconfig.json create mode 100644 extensions/inference-openai-extension/webpack.config.js diff --git a/extensions/inference-openai-extension/README.md b/extensions/inference-openai-extension/README.md new file mode 100644 index 000000000..455783efb --- /dev/null +++ b/extensions/inference-openai-extension/README.md @@ -0,0 +1,78 @@ +# Jan inference plugin + +Created using Jan app example + +# Create a Jan Plugin using Typescript + +Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀 + +## Create Your Own Plugin + +To create your own plugin, you can use this repository as a template! Just follow the below instructions: + +1. Click the Use this template button at the top of the repository +2. Select Create a new repository +3. Select an owner and name for your new repository +4. Click Create repository +5. Clone your new repository + +## Initial Setup + +After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin. + +> [!NOTE] +> +> You'll need to have a reasonably modern version of +> [Node.js](https://nodejs.org) handy. If you are using a version manager like +> [`nodenv`](https://github.com/nodenv/nodenv) or +> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the +> root of your repository to install the version specified in +> [`package.json`](./package.json). Otherwise, 20.x or later should work! + +1. :hammer_and_wrench: Install the dependencies + + ```bash + npm install + ``` + +1. :building_construction: Package the TypeScript for distribution + + ```bash + npm run bundle + ``` + +1. :white_check_mark: Check your artifact + + There will be a tgz file in your plugin directory now + +## Update the Plugin Metadata + +The [`package.json`](package.json) file defines metadata about your plugin, such as +plugin name, main entry, description and version. + +When you copy this repository, update `package.json` with the name, description for your plugin. + +## Update the Plugin Code + +The [`src/`](./src/) directory is the heart of your plugin! This contains the +source code that will be run when your plugin extension functions are invoked. You can replace the +contents of this directory with your own code. + +There are a few things to keep in mind when writing your plugin code: + +- Most Jan Plugin Extension functions are processed asynchronously. + In `index.ts`, you will see that the extension function will return a `Promise`. + + ```typescript + import { core } from "@janhq/core"; + + function onStart(): Promise { + return core.invokePluginFunc(MODULE_PATH, "run", 0); + } + ``` + + For more information about the Jan Plugin Core module, see the + [documentation](https://github.com/janhq/jan/blob/main/core/README.md). + +So, what are you waiting for? Go ahead and start customizing your plugin! + diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json new file mode 100644 index 000000000..5d5dac264 --- /dev/null +++ b/extensions/inference-openai-extension/package.json @@ -0,0 +1,57 @@ +{ + "name": "@janhq/inference-openai-extension", + "version": "1.0.0", + "description": "Inference Engine for OpenAI Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "tsc -b . && webpack --config webpack.config.js", + "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh", + "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", + "downloadnitro:win32": "download.bat", + "downloadnitro": "run-script-os", + "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish": "run-script-os" + }, + "exports": { + ".": "./dist/index.js", + "./main": "./dist/module.js" + }, + "devDependencies": { + "cpx": "^1.5.0", + "rimraf": "^3.0.2", + "run-script-os": "^1.1.6", + "webpack": "^5.88.2", + "webpack-cli": "^5.1.4" + }, + "dependencies": { + "@janhq/core": "file:../../core", + "download-cli": "^1.1.1", + "electron-log": "^5.0.1", + "fetch-retry": "^5.0.6", + "kill-port": "^2.0.1", + "path-browserify": "^1.0.1", + "rxjs": "^7.8.1", + "tcp-port-used": "^1.0.2", + "ts-loader": "^9.5.0", + "ulid": "^2.3.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ], + "bundleDependencies": [ + "tcp-port-used", + "kill-port", + "fetch-retry", + "electron-log" + ] +} diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts new file mode 100644 index 000000000..7267f0940 --- /dev/null +++ b/extensions/inference-openai-extension/src/@types/global.d.ts @@ -0,0 +1,2 @@ +declare const MODULE: string; +declare const INFERENCE_URL: string; diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts new file mode 100644 index 000000000..1ba471ab1 --- /dev/null +++ b/extensions/inference-openai-extension/src/index.ts @@ -0,0 +1,184 @@ +/** + * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + * @version 1.0.0 + * @module inference-extension/src/index + */ + +import { + ChatCompletionRole, + ContentType, + EventName, + MessageRequest, + MessageStatus, + ModelSettingParams, + ExtensionType, + ThreadContent, + ThreadMessage, + events, + executeOnMain, + getUserSpace, +} from "@janhq/core"; +import { InferenceExtension } from "@janhq/core"; +import { requestInference } from "./helpers/sse"; +import { ulid } from "ulid"; +import { join } from "path"; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class JanInferenceExtension implements InferenceExtension { + controller = new AbortController(); + isCancelled = false; + /** + * Returns the type of the extension. + * @returns {ExtensionType} The type of the extension. + */ + type(): ExtensionType { + return ExtensionType.Inference; + } + + /** + * Subscribes to events emitted by the @janhq/core package. + */ + onLoad(): void { + events.on(EventName.OnMessageSent, (data) => + JanInferenceExtension.handleMessageRequest(data, this) + ); + } + + /** + * Stops the model inference. + */ + onUnload(): void { + this.stopModel(); + } + + /** + * Initializes the model with the specified file name. + * @param {string} modelId - The ID of the model to initialize. + * @returns {Promise} A promise that resolves when the model is initialized. + */ + async initModel( + modelId: string, + settings?: ModelSettingParams + ): Promise { + const userSpacePath = await getUserSpace(); + const modelFullPath = join(userSpacePath, "models", modelId, modelId); + + return executeOnMain(MODULE, "initModel", { + modelFullPath, + settings, + }); + } + + /** + * Stops the model. + * @returns {Promise} A promise that resolves when the model is stopped. + */ + async stopModel(): Promise { + return executeOnMain(MODULE, "killSubprocess"); + } + + /** + * Stops streaming inference. + * @returns {Promise} A promise that resolves when the streaming is stopped. + */ + async stopInference(): Promise { + this.isCancelled = true; + this.controller?.abort(); + } + + /** + * Makes a single response inference request. + * @param {MessageRequest} data - The data for the inference request. + * @returns {Promise} A promise that resolves with the inference response. + */ + async inferenceRequest(data: MessageRequest): Promise { + const timestamp = Date.now(); + const message: ThreadMessage = { + thread_id: data.threadId, + created: timestamp, + updated: timestamp, + status: MessageStatus.Ready, + id: "", + role: ChatCompletionRole.Assistant, + object: "thread.message", + content: [], + }; + + return new Promise(async (resolve, reject) => { + requestInference(data.messages ?? []).subscribe({ + next: (_content) => {}, + complete: async () => { + resolve(message); + }, + error: async (err) => { + reject(err); + }, + }); + }); + } + + /** + * Handles a new message request by making an inference request and emitting events. + * Function registered in event manager, should be static to avoid binding issues. + * Pass instance as a reference. + * @param {MessageRequest} data - The data for the new message request. + */ + private static async handleMessageRequest( + data: MessageRequest, + instance: JanInferenceExtension + ) { + const timestamp = Date.now(); + const message: ThreadMessage = { + id: ulid(), + thread_id: data.threadId, + assistant_id: data.assistantId, + role: ChatCompletionRole.Assistant, + content: [], + status: MessageStatus.Pending, + created: timestamp, + updated: timestamp, + object: "thread.message", + }; + events.emit(EventName.OnMessageResponse, message); + console.log(JSON.stringify(data, null, 2)); + + instance.isCancelled = false; + instance.controller = new AbortController(); + + requestInference(data.messages, instance.controller).subscribe({ + next: (content) => { + const messageContent: ThreadContent = { + type: ContentType.Text, + text: { + value: content.trim(), + annotations: [], + }, + }; + message.content = [messageContent]; + events.emit(EventName.OnMessageUpdate, message); + }, + complete: async () => { + message.status = MessageStatus.Ready; + events.emit(EventName.OnMessageUpdate, message); + }, + error: async (err) => { + const messageContent: ThreadContent = { + type: ContentType.Text, + text: { + value: "Error occurred: " + err.message, + annotations: [], + }, + }; + message.content = [messageContent]; + message.status = MessageStatus.Ready; + events.emit(EventName.OnMessageUpdate, message); + }, + }); + } +} diff --git a/extensions/inference-openai-extension/tsconfig.json b/extensions/inference-openai-extension/tsconfig.json new file mode 100644 index 000000000..b48175a16 --- /dev/null +++ b/extensions/inference-openai-extension/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2016", + "module": "ES6", + "moduleResolution": "node", + + "outDir": "./dist", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": false, + "skipLibCheck": true, + "rootDir": "./src" + }, + "include": ["./src"] +} diff --git a/extensions/inference-openai-extension/webpack.config.js b/extensions/inference-openai-extension/webpack.config.js new file mode 100644 index 000000000..45be62271 --- /dev/null +++ b/extensions/inference-openai-extension/webpack.config.js @@ -0,0 +1,42 @@ +const path = require("path"); +const webpack = require("webpack"); +const packageJson = require("./package.json"); + +module.exports = { + experiments: { outputModule: true }, + entry: "./src/index.ts", // Adjust the entry point to match your project's main file + mode: "production", + module: { + rules: [ + { + test: /\.tsx?$/, + use: "ts-loader", + exclude: /node_modules/, + }, + ], + }, + plugins: [ + new webpack.DefinePlugin({ + MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`), + INFERENCE_URL: JSON.stringify( + process.env.INFERENCE_URL || + "http://127.0.0.1:3928/inferences/llamacpp/chat_completion" + ), + }), + ], + output: { + filename: "index.js", // Adjust the output file name as needed + path: path.resolve(__dirname, "dist"), + library: { type: "module" }, // Specify ESM output format + }, + resolve: { + extensions: [".ts", ".js"], + fallback: { + path: require.resolve("path-browserify"), + }, + }, + optimization: { + minimize: false, + }, + // Add loaders and other configuration as needed for your project +}; From 19637c40bfedd934d926169765ad81f1b35ea1b4 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 18:13:45 +0700 Subject: [PATCH 03/58] feat: Add nitro engine settings --- extensions/inference-nitro-extension/nitro.json | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 extensions/inference-nitro-extension/nitro.json diff --git a/extensions/inference-nitro-extension/nitro.json b/extensions/inference-nitro-extension/nitro.json new file mode 100644 index 000000000..8b01cb908 --- /dev/null +++ b/extensions/inference-nitro-extension/nitro.json @@ -0,0 +1,6 @@ +{ + "ctx_len": 2048, + "ngl": 100, + "cont_batching": false, + "embedding": false +} \ No newline at end of file From 337da5084082cc9be74ce42689211b546a903716 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 18:14:13 +0700 Subject: [PATCH 04/58] fix: Add fs to read and write nitro engine settings --- extensions/inference-nitro-extension/src/index.ts | 14 ++++++++++++-- extensions/inference-nitro-extension/src/module.ts | 6 ++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index e8e7758bb..0a1011772 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -19,6 +19,7 @@ import { events, executeOnMain, getUserSpace, + fs } from "@janhq/core"; import { InferenceExtension } from "@janhq/core"; import { requestInference } from "./helpers/sse"; @@ -31,6 +32,9 @@ import { join } from "path"; * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ export default class JanInferenceExtension implements InferenceExtension { + private static readonly _homeDir = 'engines' + private static readonly _engineMetadataFileName = 'nitro.json' + controller = new AbortController(); isCancelled = false; /** @@ -45,6 +49,8 @@ export default class JanInferenceExtension implements InferenceExtension { * Subscribes to events emitted by the @janhq/core package. */ onLoad(): void { + fs.mkdir(JanInferenceExtension._homeDir) + events.on(EventName.OnMessageSent, (data) => JanInferenceExtension.handleMessageRequest(data, this) ); @@ -68,10 +74,14 @@ export default class JanInferenceExtension implements InferenceExtension { ): Promise { const userSpacePath = await getUserSpace(); const modelFullPath = join(userSpacePath, "models", modelId, modelId); - + let engine_settings = JSON.parse(await fs.readFile(join(JanInferenceExtension._homeDir, JanInferenceExtension._engineMetadataFileName))) + engine_settings = { + engine_settings + ...settings, + }; return executeOnMain(MODULE, "initModel", { modelFullPath, - settings, + engine_settings, }); } diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index 90582073e..e9b2d8eb5 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -42,12 +42,10 @@ function initModel(wrapper: any): Promise { const settings = { llama_model_path: currentModelFile, - ctx_len: 2048, - ngl: 100, - cont_batching: false, - embedding: false, // Always enable embedding mode on ...wrapper.settings, }; + + log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); return ( From d69f0e3321380644b30ef714784bebe34c50e126 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 18:14:29 +0700 Subject: [PATCH 05/58] chore: Update openai engine --- .../src/helpers/sse.ts | 56 +++++++++++++++ .../inference-openai-extension/src/index.ts | 69 ++++++++++--------- .../inference-openai-extension/src/module.ts | 34 +++++++++ 3 files changed, 127 insertions(+), 32 deletions(-) create mode 100644 extensions/inference-openai-extension/src/helpers/sse.ts create mode 100644 extensions/inference-openai-extension/src/module.ts diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts new file mode 100644 index 000000000..f427e443c --- /dev/null +++ b/extensions/inference-openai-extension/src/helpers/sse.ts @@ -0,0 +1,56 @@ +import { Observable } from "rxjs"; +/** + * Sends a request to the inference server to generate a response based on the recent messages. + * @param recentMessages - An array of recent messages to use as context for the inference. + * @returns An Observable that emits the generated response as a string. + */ +export function requestInference( + recentMessages: any[], + controller?: AbortController +): Observable { + return new Observable((subscriber) => { + const requestBody = JSON.stringify({ + messages: recentMessages, + stream: true, + model: "gpt-3.5-turbo", + max_tokens: 2048, + }); + fetch(INFERENCE_URL, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "text/event-stream", + "Access-Control-Allow-Origin": "*", + }, + body: requestBody, + signal: controller?.signal, + }) + .then(async (response) => { + const stream = response.body; + const decoder = new TextDecoder("utf-8"); + const reader = stream?.getReader(); + let content = ""; + + while (true && reader) { + const { done, value } = await reader.read(); + if (done) { + break; + } + const text = decoder.decode(value); + const lines = text.trim().split("\n"); + for (const line of lines) { + if (line.startsWith("data: ") && !line.includes("data: [DONE]")) { + const data = JSON.parse(line.replace("data: ", "")); + content += data.choices[0]?.delta?.content ?? ""; + if (content.startsWith("assistant: ")) { + content = content.replace("assistant: ", ""); + } + subscriber.next(content); + } + } + } + subscriber.complete(); + }) + .catch((err) => subscriber.error(err)); + }); +} diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 1ba471ab1..652f47b6c 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -3,7 +3,7 @@ * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. * @version 1.0.0 - * @module inference-extension/src/index + * @module inference-openai-extension/src/index */ import { @@ -19,6 +19,7 @@ import { events, executeOnMain, getUserSpace, + fs } from "@janhq/core"; import { InferenceExtension } from "@janhq/core"; import { requestInference } from "./helpers/sse"; @@ -31,20 +32,26 @@ import { join } from "path"; * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ export default class JanInferenceExtension implements InferenceExtension { + private static readonly _homeDir = 'engines' + private static readonly _engineMetadataFileName = 'openai.json' + controller = new AbortController(); isCancelled = false; /** * Returns the type of the extension. * @returns {ExtensionType} The type of the extension. */ + // TODO: To fix type(): ExtensionType { - return ExtensionType.Inference; + return undefined; } - +// janroot/engine/nitro.json /** * Subscribes to events emitted by the @janhq/core package. */ onLoad(): void { + fs.mkdir(JanInferenceExtension._homeDir) + // TODO: Copy nitro.json to janroot/engine/nitro.json events.on(EventName.OnMessageSent, (data) => JanInferenceExtension.handleMessageRequest(data, this) ); @@ -53,9 +60,7 @@ export default class JanInferenceExtension implements InferenceExtension { /** * Stops the model inference. */ - onUnload(): void { - this.stopModel(); - } + onUnload(): void {} /** * Initializes the model with the specified file name. @@ -79,9 +84,7 @@ export default class JanInferenceExtension implements InferenceExtension { * Stops the model. * @returns {Promise} A promise that resolves when the model is stopped. */ - async stopModel(): Promise { - return executeOnMain(MODULE, "killSubprocess"); - } + async stopModel(): Promise {} /** * Stops streaming inference. @@ -92,35 +95,37 @@ export default class JanInferenceExtension implements InferenceExtension { this.controller?.abort(); } + private async copyModelsToHomeDir() { + try { + // list all of the files under the home directory + const files = await fs.listFiles('') + + if (files.includes(JanInferenceExtension._homeDir)) { + // ignore if the model is already downloaded + console.debug('Model already downloaded') + return + } + + // copy models folder from resources to home directory + const resourePath = await getResourcePath() + const srcPath = join(resourePath, 'models') + + const userSpace = await getUserSpace() + const destPath = join(userSpace, JanInferenceExtension._homeDir) + + await fs.copyFile(srcPath, destPath) + } catch (err) { + console.error(err) + } + } + /** * Makes a single response inference request. * @param {MessageRequest} data - The data for the inference request. * @returns {Promise} A promise that resolves with the inference response. */ async inferenceRequest(data: MessageRequest): Promise { - const timestamp = Date.now(); - const message: ThreadMessage = { - thread_id: data.threadId, - created: timestamp, - updated: timestamp, - status: MessageStatus.Ready, - id: "", - role: ChatCompletionRole.Assistant, - object: "thread.message", - content: [], - }; - - return new Promise(async (resolve, reject) => { - requestInference(data.messages ?? []).subscribe({ - next: (_content) => {}, - complete: async () => { - resolve(message); - }, - error: async (err) => { - reject(err); - }, - }); - }); + // TODO: @louis } /** diff --git a/extensions/inference-openai-extension/src/module.ts b/extensions/inference-openai-extension/src/module.ts new file mode 100644 index 000000000..305c2e804 --- /dev/null +++ b/extensions/inference-openai-extension/src/module.ts @@ -0,0 +1,34 @@ +const fetchRetry = require("fetch-retry")(global.fetch); + +const log = require("electron-log"); + +const OPENAI_BASE_URL = "https://api.openai.com/v1"; +const OPENAI_API_KEY = process.env.OPENAI_API_KEY; + +/** + * The response from the initModel function. + * @property error - An error message if the model fails to load. + */ +interface InitModelResponse { + error?: any; + modelFile?: string; +} +// /root/engine/nitro.json + +/** + * Initializes a Nitro subprocess to load a machine learning model. + * @param modelFile - The name of the machine learning model file. + * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. + */ +function initModel(wrapper: any): Promise { + const engine_settings = { + ...wrapper.settings, + }; + + return ( + ) +} + +module.exports = { + initModel, +}; From 6d3bf24d5caeed14dcf60ff8f5035324ee608b5c Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 18:30:47 +0700 Subject: [PATCH 06/58] chore: remove gitkeep --- extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep | 0 extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep | 0 extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep | 0 extensions/inference-nitro-extension/bin/mac-x64/.gitkeep | 0 extensions/inference-nitro-extension/bin/win-cpu/.gitkeep | 0 extensions/inference-nitro-extension/bin/win-cuda/.gitkeep | 0 6 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep delete mode 100644 extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep delete mode 100644 extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep delete mode 100644 extensions/inference-nitro-extension/bin/mac-x64/.gitkeep delete mode 100644 extensions/inference-nitro-extension/bin/win-cpu/.gitkeep delete mode 100644 extensions/inference-nitro-extension/bin/win-cuda/.gitkeep diff --git a/extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cpu/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/linux-cuda/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-arm64/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/inference-nitro-extension/bin/mac-x64/.gitkeep b/extensions/inference-nitro-extension/bin/mac-x64/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/inference-nitro-extension/bin/win-cpu/.gitkeep b/extensions/inference-nitro-extension/bin/win-cpu/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/extensions/inference-nitro-extension/bin/win-cuda/.gitkeep b/extensions/inference-nitro-extension/bin/win-cuda/.gitkeep deleted file mode 100644 index e69de29bb..000000000 From a985626f293ffd7659239ef8e041e585a45fc17e Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 18:33:02 +0700 Subject: [PATCH 07/58] feat: Add openai gpt-3.5 model.json --- models/openai-gpt-3.5/model.json | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 models/openai-gpt-3.5/model.json diff --git a/models/openai-gpt-3.5/model.json b/models/openai-gpt-3.5/model.json new file mode 100644 index 000000000..8d5060e6c --- /dev/null +++ b/models/openai-gpt-3.5/model.json @@ -0,0 +1,19 @@ +{ + "source_url": "https://api.openai.com/v1", + "id": "openai-gpt35", + "object": "model", + "name": "OpenAI GPT 3.5", + "version": 1.0, + "description": "OpenAI GPT 3.5 model is extremely good", + "format": "api", + "settings": {}, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai" +} + \ No newline at end of file From 5f8e2ae54cac67869a99f00004a160efb7fd838a Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 1 Dec 2023 18:33:29 +0700 Subject: [PATCH 08/58] fix: Add engine llama.cpp to local models --- models/capybara-34b/model.json | 3 ++- models/deepseek-coder-1.3b/model.json | 4 +++- models/lzlv-70b/model.json | 3 ++- models/neural-chat-7b/model.json | 3 ++- models/openorca-13b/model.json | 3 ++- models/phind-34b/model.json | 3 ++- models/rocket-3b/model.json | 3 ++- models/starling-7b/model.json | 3 ++- models/tiefighter-13b/model.json | 3 ++- models/tinyllama-1.1b/model.json | 3 ++- models/wizardcoder-13b/model.json | 3 ++- models/yi-34b/model.json | 3 ++- models/zephyr-beta-7b/model.json | 3 ++- 13 files changed, 27 insertions(+), 13 deletions(-) diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index 4ac9899f1..09e6e64a0 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -19,6 +19,7 @@ "author": "NousResearch, The Bloke", "tags": ["34B", "Finetuned"], "size": 24320000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json index 4bab24ae2..70f86ec89 100644 --- a/models/deepseek-coder-1.3b/model.json +++ b/models/deepseek-coder-1.3b/model.json @@ -1,3 +1,4 @@ + { "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf", "id": "deepseek-coder-1.3b", @@ -19,5 +20,6 @@ "author": "Deepseek, The Bloke", "tags": ["Tiny", "Foundational Model"], "size": 1430000000 - } + }, + "engine": "nitro" } diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json index 621d980ab..249171fcf 100644 --- a/models/lzlv-70b/model.json +++ b/models/lzlv-70b/model.json @@ -19,6 +19,7 @@ "author": "Lizpreciatior, The Bloke", "tags": ["70B", "Finetuned"], "size": 48750000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/neural-chat-7b/model.json b/models/neural-chat-7b/model.json index dfccf073d..634f9efe9 100644 --- a/models/neural-chat-7b/model.json +++ b/models/neural-chat-7b/model.json @@ -19,6 +19,7 @@ "author": "Intel, The Bloke", "tags": ["Recommended", "7B", "Finetuned"], "size": 4370000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/openorca-13b/model.json b/models/openorca-13b/model.json index fc4773371..edb0e24af 100644 --- a/models/openorca-13b/model.json +++ b/models/openorca-13b/model.json @@ -19,6 +19,7 @@ "author": "Microsoft, The Bloke", "tags": ["13B", "Finetuned"], "size": 9230000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json index 40d53b77a..dd68b4771 100644 --- a/models/phind-34b/model.json +++ b/models/phind-34b/model.json @@ -19,6 +19,7 @@ "author": "Phind, The Bloke", "tags": ["34B", "Finetuned"], "size": 24320000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json index c40ee6258..4c8f29454 100644 --- a/models/rocket-3b/model.json +++ b/models/rocket-3b/model.json @@ -19,5 +19,6 @@ "author": "Pansophic, The Bloke", "tags": ["Tiny", "Finetuned"], "size": 1710000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json index de7693673..a9e237ec3 100644 --- a/models/starling-7b/model.json +++ b/models/starling-7b/model.json @@ -19,6 +19,7 @@ "author": "Berkeley-nest, The Bloke", "tags": ["Recommended", "7B","Finetuned"], "size": 4370000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/tiefighter-13b/model.json b/models/tiefighter-13b/model.json index c7f209b77..a0fcdb04e 100644 --- a/models/tiefighter-13b/model.json +++ b/models/tiefighter-13b/model.json @@ -19,6 +19,7 @@ "author": "KoboldAI, The Bloke", "tags": ["13B", "Finetuned"], "size": 9230000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index ace0ca6a0..40bcf4c14 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -19,5 +19,6 @@ "author": "TinyLlama", "tags": ["Tiny", "Foundation Model"], "size": 637000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json index 63cbd174b..1b86632e9 100644 --- a/models/wizardcoder-13b/model.json +++ b/models/wizardcoder-13b/model.json @@ -19,6 +19,7 @@ "author": "WizardLM, The Bloke", "tags": ["Recommended", "13B", "Finetuned"], "size": 9230000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json index 7c6da2f03..103e048f9 100644 --- a/models/yi-34b/model.json +++ b/models/yi-34b/model.json @@ -19,6 +19,7 @@ "author": "01-ai, The Bloke", "tags": ["34B", "Foundational Model"], "size": 24320000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file diff --git a/models/zephyr-beta-7b/model.json b/models/zephyr-beta-7b/model.json index f1ef4eb20..a846f6b1c 100644 --- a/models/zephyr-beta-7b/model.json +++ b/models/zephyr-beta-7b/model.json @@ -19,6 +19,7 @@ "author": "HuggingFaceH4, The Bloke", "tags": ["7B", "Finetuned"], "size": 4370000000 - } + }, + "engine": "llama.cpp" } \ No newline at end of file From 68ee6693bac998c7e6077079a6d5262e43011192 Mon Sep 17 00:00:00 2001 From: hiro Date: Sun, 3 Dec 2023 14:34:49 +0700 Subject: [PATCH 09/58] chore: Remove default nitro.json file --- extensions/inference-nitro-extension/nitro.json | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 extensions/inference-nitro-extension/nitro.json diff --git a/extensions/inference-nitro-extension/nitro.json b/extensions/inference-nitro-extension/nitro.json deleted file mode 100644 index 8b01cb908..000000000 --- a/extensions/inference-nitro-extension/nitro.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "ctx_len": 2048, - "ngl": 100, - "cont_batching": false, - "embedding": false -} \ No newline at end of file From 9a18d3133ca219ace52457640526ee4993879948 Mon Sep 17 00:00:00 2001 From: hiro Date: Sun, 3 Dec 2023 14:35:22 +0700 Subject: [PATCH 10/58] chore: refactor openai file structure --- .../inference-openai-extension/package.json | 22 ++---------- .../inference-openai-extension/src/module.ts | 34 ------------------- 2 files changed, 3 insertions(+), 53 deletions(-) delete mode 100644 extensions/inference-openai-extension/src/module.ts diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json index 5d5dac264..c32027ca8 100644 --- a/extensions/inference-openai-extension/package.json +++ b/extensions/inference-openai-extension/package.json @@ -1,21 +1,14 @@ { "name": "@janhq/inference-openai-extension", "version": "1.0.0", - "description": "Inference Engine for OpenAI Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.", + "description": "Inference Engine for OpenAI Extension that can be used with any OpenAI compatible API", "main": "dist/index.js", "module": "dist/module.js", "author": "Jan ", "license": "AGPL-3.0", "scripts": { "build": "tsc -b . && webpack --config webpack.config.js", - "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh", - "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", - "downloadnitro:win32": "download.bat", - "downloadnitro": "run-script-os", - "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish": "run-script-os" + "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install" }, "exports": { ".": "./dist/index.js", @@ -24,19 +17,13 @@ "devDependencies": { "cpx": "^1.5.0", "rimraf": "^3.0.2", - "run-script-os": "^1.1.6", "webpack": "^5.88.2", "webpack-cli": "^5.1.4" }, "dependencies": { "@janhq/core": "file:../../core", - "download-cli": "^1.1.1", - "electron-log": "^5.0.1", "fetch-retry": "^5.0.6", - "kill-port": "^2.0.1", "path-browserify": "^1.0.1", - "rxjs": "^7.8.1", - "tcp-port-used": "^1.0.2", "ts-loader": "^9.5.0", "ulid": "^2.3.0" }, @@ -49,9 +36,6 @@ "README.md" ], "bundleDependencies": [ - "tcp-port-used", - "kill-port", - "fetch-retry", - "electron-log" + "fetch-retry" ] } diff --git a/extensions/inference-openai-extension/src/module.ts b/extensions/inference-openai-extension/src/module.ts deleted file mode 100644 index 305c2e804..000000000 --- a/extensions/inference-openai-extension/src/module.ts +++ /dev/null @@ -1,34 +0,0 @@ -const fetchRetry = require("fetch-retry")(global.fetch); - -const log = require("electron-log"); - -const OPENAI_BASE_URL = "https://api.openai.com/v1"; -const OPENAI_API_KEY = process.env.OPENAI_API_KEY; - -/** - * The response from the initModel function. - * @property error - An error message if the model fails to load. - */ -interface InitModelResponse { - error?: any; - modelFile?: string; -} -// /root/engine/nitro.json - -/** - * Initializes a Nitro subprocess to load a machine learning model. - * @param modelFile - The name of the machine learning model file. - * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. - */ -function initModel(wrapper: any): Promise { - const engine_settings = { - ...wrapper.settings, - }; - - return ( - ) -} - -module.exports = { - initModel, -}; From 8ab36d7cb232edb9d2bd522ab4de4e108c1ab958 Mon Sep 17 00:00:00 2001 From: hiro Date: Sun, 3 Dec 2023 14:35:54 +0700 Subject: [PATCH 11/58] feat: Add openai engine json reader and writer --- .../inference-openai-extension/src/index.ts | 80 ++++++++++++------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 652f47b6c..4fd16a414 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -19,7 +19,7 @@ import { events, executeOnMain, getUserSpace, - fs + fs, } from "@janhq/core"; import { InferenceExtension } from "@janhq/core"; import { requestInference } from "./helpers/sse"; @@ -31,7 +31,7 @@ import { join } from "path"; * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ -export default class JanInferenceExtension implements InferenceExtension { +export default class JanInferenceOpenAIExtension implements InferenceExtension { private static readonly _homeDir = 'engines' private static readonly _engineMetadataFileName = 'openai.json' @@ -50,10 +50,10 @@ export default class JanInferenceExtension implements InferenceExtension { * Subscribes to events emitted by the @janhq/core package. */ onLoad(): void { - fs.mkdir(JanInferenceExtension._homeDir) - // TODO: Copy nitro.json to janroot/engine/nitro.json + fs.mkdir(JanInferenceOpenAIExtension._homeDir) + this.writeDefaultEngineSettings() events.on(EventName.OnMessageSent, (data) => - JanInferenceExtension.handleMessageRequest(data, this) + JanInferenceOpenAIExtension.handleMessageRequest(data, this) ); } @@ -80,6 +80,26 @@ export default class JanInferenceExtension implements InferenceExtension { }); } + private async writeDefaultEngineSettings() { + try { + + const destPath = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName) + // TODO: Check with @louis for adding new binding + // if (await fs.checkFileExists(destPath)) { + const default_engine_settings = { + "base_url": "https://api.openai.com/v1", + "api_key": "sk-" + } + console.log(`Writing OpenAI engine settings to ${destPath}`) + await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2)) + // } + // else { + // console.log(`OpenAI engine settings already exist at ${destPath}`) + // } + } catch (err) { + console.error(err) + } + } /** * Stops the model. * @returns {Promise} A promise that resolves when the model is stopped. @@ -95,37 +115,35 @@ export default class JanInferenceExtension implements InferenceExtension { this.controller?.abort(); } - private async copyModelsToHomeDir() { - try { - // list all of the files under the home directory - const files = await fs.listFiles('') - - if (files.includes(JanInferenceExtension._homeDir)) { - // ignore if the model is already downloaded - console.debug('Model already downloaded') - return - } - - // copy models folder from resources to home directory - const resourePath = await getResourcePath() - const srcPath = join(resourePath, 'models') - - const userSpace = await getUserSpace() - const destPath = join(userSpace, JanInferenceExtension._homeDir) - - await fs.copyFile(srcPath, destPath) - } catch (err) { - console.error(err) - } - } - /** * Makes a single response inference request. * @param {MessageRequest} data - The data for the inference request. * @returns {Promise} A promise that resolves with the inference response. */ async inferenceRequest(data: MessageRequest): Promise { - // TODO: @louis + const timestamp = Date.now(); + const message: ThreadMessage = { + thread_id: data.threadId, + created: timestamp, + updated: timestamp, + status: MessageStatus.Ready, + id: "", + role: ChatCompletionRole.Assistant, + object: "thread.message", + content: [], + }; + + return new Promise(async (resolve, reject) => { + requestInference(data.messages ?? []).subscribe({ + next: (_content) => {}, + complete: async () => { + resolve(message); + }, + error: async (err) => { + reject(err); + }, + }); + }); } /** @@ -136,7 +154,7 @@ export default class JanInferenceExtension implements InferenceExtension { */ private static async handleMessageRequest( data: MessageRequest, - instance: JanInferenceExtension + instance: JanInferenceOpenAIExtension ) { const timestamp = Date.now(); const message: ThreadMessage = { From 56b778675f46865a039b4971ef073b679d2b7e33 Mon Sep 17 00:00:00 2001 From: hiro Date: Sun, 3 Dec 2023 14:36:01 +0700 Subject: [PATCH 12/58] feat: Add nitro engine json reader and writer --- .../inference-nitro-extension/src/index.ts | 42 +++++++++++++------ 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index 0a1011772..17f207ab9 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -19,7 +19,7 @@ import { events, executeOnMain, getUserSpace, - fs + fs, } from "@janhq/core"; import { InferenceExtension } from "@janhq/core"; import { requestInference } from "./helpers/sse"; @@ -31,7 +31,7 @@ import { join } from "path"; * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ -export default class JanInferenceExtension implements InferenceExtension { +export default class JanInferenceNitroExtension implements InferenceExtension { private static readonly _homeDir = 'engines' private static readonly _engineMetadataFileName = 'nitro.json' @@ -49,10 +49,10 @@ export default class JanInferenceExtension implements InferenceExtension { * Subscribes to events emitted by the @janhq/core package. */ onLoad(): void { - fs.mkdir(JanInferenceExtension._homeDir) - + fs.mkdir(JanInferenceNitroExtension._homeDir) + this.writeDefaultEngineSettings() events.on(EventName.OnMessageSent, (data) => - JanInferenceExtension.handleMessageRequest(data, this) + JanInferenceNitroExtension.handleMessageRequest(data, this) ); } @@ -74,14 +74,10 @@ export default class JanInferenceExtension implements InferenceExtension { ): Promise { const userSpacePath = await getUserSpace(); const modelFullPath = join(userSpacePath, "models", modelId, modelId); - let engine_settings = JSON.parse(await fs.readFile(join(JanInferenceExtension._homeDir, JanInferenceExtension._engineMetadataFileName))) - engine_settings = { - engine_settings - ...settings, - }; + return executeOnMain(MODULE, "initModel", { modelFullPath, - engine_settings, + settings, }); } @@ -102,6 +98,28 @@ export default class JanInferenceExtension implements InferenceExtension { this.controller?.abort(); } + private async writeDefaultEngineSettings() { + try { + const destPath = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName) + // TODO: Check with @louis for adding new binding + // if (await fs.checkFileExists(destPath)) { + const default_engine_settings = { + "ctx_len": 2048, + "ngl": 100, + "cont_batching": false, + "embedding": false + } + console.log(`Writing nitro engine settings to ${destPath}`) + await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2)) + // } + // else { + // console.log(`Using existing nitro engine settings at ${destPath}`) + // } + } catch (err) { + console.error(err) + } + } + /** * Makes a single response inference request. * @param {MessageRequest} data - The data for the inference request. @@ -141,7 +159,7 @@ export default class JanInferenceExtension implements InferenceExtension { */ private static async handleMessageRequest( data: MessageRequest, - instance: JanInferenceExtension + instance: JanInferenceNitroExtension ) { const timestamp = Date.now(); const message: ThreadMessage = { From 9aca37a30c1ee106a0ad7c1f4aab5b7f96b87edd Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 12:09:49 +0700 Subject: [PATCH 13/58] chore: Add fs abstraction for checkFileExists --- core/src/fs.ts | 35 ++++++++++++++++++----------------- electron/handlers/fs.ts | 13 +++++++++++++ electron/invokers/fs.ts | 6 ++++++ 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/core/src/fs.ts b/core/src/fs.ts index 4013479dd..99c8cbccf 100644 --- a/core/src/fs.ts +++ b/core/src/fs.ts @@ -5,52 +5,52 @@ * @returns {Promise} A Promise that resolves when the file is written successfully. */ const writeFile: (path: string, data: string) => Promise = (path, data) => - global.core.api?.writeFile(path, data); + global.core.api?.writeFile(path, data) /** * Checks whether the path is a directory. * @param path - The path to check. * @returns {boolean} A boolean indicating whether the path is a directory. */ -const isDirectory = (path: string): Promise => - global.core.api?.isDirectory(path); +const isDirectory = (path: string): Promise => global.core.api?.isDirectory(path) /** * Reads the contents of a file at the specified path. * @param {string} path - The path of the file to read. * @returns {Promise} A Promise that resolves with the contents of the file. */ -const readFile: (path: string) => Promise = (path) => - global.core.api?.readFile(path); +const readFile: (path: string) => Promise = (path) => global.core.api?.readFile(path) +/** + * Check whether the file exists + * @param {string} path + * @returns {boolean} A boolean indicating whether the path is a file. + */ +const checkFileExists = (path: string): Promise => global.core.api?.checkFileExists(path) /** * List the directory files * @param {string} path - The path of the directory to list files. * @returns {Promise} A Promise that resolves with the contents of the directory. */ -const listFiles: (path: string) => Promise = (path) => - global.core.api?.listFiles(path); +const listFiles: (path: string) => Promise = (path) => global.core.api?.listFiles(path) /** * Creates a directory at the specified path. * @param {string} path - The path of the directory to create. * @returns {Promise} A Promise that resolves when the directory is created successfully. */ -const mkdir: (path: string) => Promise = (path) => - global.core.api?.mkdir(path); +const mkdir: (path: string) => Promise = (path) => global.core.api?.mkdir(path) /** * Removes a directory at the specified path. * @param {string} path - The path of the directory to remove. * @returns {Promise} A Promise that resolves when the directory is removed successfully. */ -const rmdir: (path: string) => Promise = (path) => - global.core.api?.rmdir(path); +const rmdir: (path: string) => Promise = (path) => global.core.api?.rmdir(path) /** * Deletes a file from the local file system. * @param {string} path - The path of the file to delete. * @returns {Promise} A Promise that resolves when the file is deleted. */ -const deleteFile: (path: string) => Promise = (path) => - global.core.api?.deleteFile(path); +const deleteFile: (path: string) => Promise = (path) => global.core.api?.deleteFile(path) /** * Appends data to a file at the specified path. @@ -58,10 +58,10 @@ const deleteFile: (path: string) => Promise = (path) => * @param data data to append */ const appendFile: (path: string, data: string) => Promise = (path, data) => - global.core.api?.appendFile(path, data); + global.core.api?.appendFile(path, data) const copyFile: (src: string, dest: string) => Promise = (src, dest) => - global.core.api?.copyFile(src, dest); + global.core.api?.copyFile(src, dest) /** * Reads a file line by line. @@ -69,12 +69,13 @@ const copyFile: (src: string, dest: string) => Promise = (src, dest) => * @returns {Promise} A promise that resolves to the lines of the file. */ const readLineByLine: (path: string) => Promise = (path) => - global.core.api?.readLineByLine(path); + global.core.api?.readLineByLine(path) export const fs = { isDirectory, writeFile, readFile, + checkFileExists, listFiles, mkdir, rmdir, @@ -82,4 +83,4 @@ export const fs = { appendFile, readLineByLine, copyFile, -}; +} diff --git a/electron/handlers/fs.ts b/electron/handlers/fs.ts index 16cef6eb6..1e2df5c59 100644 --- a/electron/handlers/fs.ts +++ b/electron/handlers/fs.ts @@ -50,6 +50,19 @@ export function handleFsIPCs() { }) }) + /** + * Checks whether a file exists in the user data directory. + * @param event - The event object. + * @param path - The path of the file to check. + * @returns A promise that resolves with a boolean indicating whether the file exists. + */ + ipcMain.handle('checkFileExists', async (_event, path: string) => { + return new Promise((resolve, reject) => { + const fullPath = join(userSpacePath, path) + fs.existsSync(fullPath) ? resolve(true) : resolve(false) + }) + }) + /** * Writes data to a file in the user data directory. * @param event - The event object. diff --git a/electron/invokers/fs.ts b/electron/invokers/fs.ts index 309562ad6..e59eb4c86 100644 --- a/electron/invokers/fs.ts +++ b/electron/invokers/fs.ts @@ -27,6 +27,12 @@ export function fsInvokers() { */ readFile: (path: string) => ipcRenderer.invoke('readFile', path), + /** + * Reads a file at the specified path. + * @param {string} path - The path of the file to read. + */ + checkFileExists: (path: string) => ipcRenderer.invoke('checkFileExists', path), + /** * Writes data to a file at the specified path. * @param {string} path - The path of the file to write to. From 1bc5fe64f3bf4e3deb390a0690a5425cce41dcfe Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 12:10:24 +0700 Subject: [PATCH 14/58] fix: Use Events for init, load, stop models --- core/src/events.ts | 8 +++ core/src/types/index.ts | 15 +++++ web/containers/Providers/EventHandler.tsx | 49 +++++++++++++++- web/hooks/useActiveModel.ts | 56 ++----------------- .../ExploreModelItemHeader/index.tsx | 20 ++++++- 5 files changed, 93 insertions(+), 55 deletions(-) diff --git a/core/src/events.ts b/core/src/events.ts index f588daad7..81451c1f0 100644 --- a/core/src/events.ts +++ b/core/src/events.ts @@ -8,6 +8,14 @@ export enum EventName { OnMessageResponse = "OnMessageResponse", /** The `OnMessageUpdate` event is emitted when a message is updated. */ OnMessageUpdate = "OnMessageUpdate", + /** The `OnModelInit` event is emitted when a model inits. */ + OnModelInit = "OnModelInit", + /** The `OnModelReady` event is emitted when a model ready. */ + OnModelReady = "OnModelReady", + /** The `OnModelFail` event is emitted when a model fails loading. */ + OnModelFail = "OnModelFail", + /** The `OnModelStop` event is emitted when a model fails loading. */ + OnModelStop = "OnModelStop", } /** diff --git a/core/src/types/index.ts b/core/src/types/index.ts index 87343aa65..5b45d4cc8 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -166,6 +166,17 @@ export type ThreadState = { error?: Error; lastMessage?: string; }; +/** + * Represents the inference engine. + * @stored + */ + +enum InferenceEngine { + llama_cpp = "llama_cpp", + openai = "openai", + nvidia_triton = "nvidia_triton", + hf_endpoint = "hf_endpoint", +} /** * Model type defines the shape of a model object. @@ -234,6 +245,10 @@ export interface Model { * Metadata of the model. */ metadata: ModelMetadata; + /** + * The model engine. Enum: "llamacpp" "openai" + */ + engine: InferenceEngine; } export type ModelMetadata = { diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index 46f4b19d4..a3910e266 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -7,9 +7,10 @@ import { ThreadMessage, ExtensionType, MessageStatus, + Model } from '@janhq/core' import { ConversationalExtension } from '@janhq/core' -import { useAtomValue, useSetAtom } from 'jotai' +import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' import { extensionManager } from '@/extension' import { @@ -21,9 +22,16 @@ import { threadsAtom, } from '@/helpers/atoms/Conversation.atom' +import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' +import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' +import { toaster } from '../Toast' + export default function EventHandler({ children }: { children: ReactNode }) { const addNewMessage = useSetAtom(addNewMessageAtom) const updateMessage = useSetAtom(updateMessageAtom) + const { downloadedModels } = useGetDownloadedModels() + const [activeModel, setActiveModel] = useAtom(activeModelAtom) + const [stateModel, setStateModel] = useAtom(stateModelAtom) const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom) const threads = useAtomValue(threadsAtom) @@ -37,6 +45,42 @@ export default function EventHandler({ children }: { children: ReactNode }) { addNewMessage(message) } + async function handleModelReady(res: any) { + const model = downloadedModels.find((e) => e.id === res.modelId) + setActiveModel(model) + toaster({ + title: 'Success!', + description: `Model ${res.modelId} has been started.`, + }) + setStateModel(() => ({ + state: 'stop', + loading: false, + model: res.modelId, + })) + } + + async function handleModelStop(res: any) { + const model = downloadedModels.find((e) => e.id === res.modelId) + setTimeout(async () => { + setActiveModel(undefined) + setStateModel({ state: 'start', loading: false, model: '' }) + toaster({ + title: 'Success!', + description: `Model ${res.modelId} has been stopped.`, + }) + }, 500) + } + + async function handleModelFail(res: any) { + const errorMessage = `${res.error}` + alert(errorMessage) + setStateModel(() => ({ + state: 'start', + loading: false, + model: res.modelId, + })) + } + async function handleMessageResponseUpdate(message: ThreadMessage) { updateMessage( message.id, @@ -73,6 +117,9 @@ export default function EventHandler({ children }: { children: ReactNode }) { if (window.core.events) { events.on(EventName.OnMessageResponse, handleNewMessageResponse) events.on(EventName.OnMessageUpdate, handleMessageResponseUpdate) + events.on(EventName.OnModelReady, handleModelReady) + events.on(EventName.OnModelFail, handleModelFail) + events.on(EventName.OnModelStop, handleModelStop) } // eslint-disable-next-line react-hooks/exhaustive-deps }, []) diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 60be0f2c4..4f1565e15 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,5 +1,5 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { ExtensionType, InferenceExtension } from '@janhq/core' +import { EventName, ExtensionType, InferenceExtension, events } from '@janhq/core' import { Model, ModelSettingParams } from '@janhq/core' import { atom, useAtom } from 'jotai' @@ -9,9 +9,9 @@ import { useGetDownloadedModels } from './useGetDownloadedModels' import { extensionManager } from '@/extension' -const activeModelAtom = atom(undefined) +export const activeModelAtom = atom(undefined) -const stateModelAtom = atom({ state: 'start', loading: false, model: '' }) +export const stateModelAtom = atom({ state: 'start', loading: false, model: '' }) export function useActiveModel() { const [activeModel, setActiveModel] = useAtom(activeModelAtom) @@ -47,59 +47,13 @@ export function useActiveModel() { return } - const currentTime = Date.now() - const res = await initModel(modelId, model?.settings) - if (res && res.error) { - const errorMessage = `${res.error}` - alert(errorMessage) - setStateModel(() => ({ - state: 'start', - loading: false, - model: modelId, - })) - } else { - console.debug( - `Model ${modelId} successfully initialized! Took ${ - Date.now() - currentTime - }ms` - ) - setActiveModel(model) - toaster({ - title: 'Success!', - description: `Model ${modelId} has been started.`, - }) - setStateModel(() => ({ - state: 'stop', - loading: false, - model: modelId, - })) - } + events.emit(EventName.OnModelInit, model) } const stopModel = async (modelId: string) => { setStateModel({ state: 'stop', loading: true, model: modelId }) - setTimeout(async () => { - extensionManager - .get(ExtensionType.Inference) - ?.stopModel() - - setActiveModel(undefined) - setStateModel({ state: 'start', loading: false, model: '' }) - toaster({ - title: 'Success!', - description: `Model ${modelId} has been stopped.`, - }) - }, 500) + events.emit(EventName.OnModelStop, modelId) } return { activeModel, startModel, stopModel, stateModel } } - -const initModel = async ( - modelId: string, - settings?: ModelSettingParams -): Promise => { - return extensionManager - .get(ExtensionType.Inference) - ?.initModel(modelId, settings) -} diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx index ba23056c6..f5d54f0be 100644 --- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx +++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx @@ -55,9 +55,23 @@ const ExploreModelItemHeader: React.FC = ({ model, onClick, open }) => { const isDownloaded = downloadedModels.find((md) => md.id === model.id) != null - let downloadButton = ( - - ) + let downloadButton; + + if (model.engine !== 'nitro') { + downloadButton = ( + + ); + } else if (model.engine === 'nitro') { + downloadButton = ( + + ); + } const onUseModelClick = () => { startModel(model.id) From 2c648caa5f7f4d0d80ab360601d0d322b2eed69e Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 12:10:46 +0700 Subject: [PATCH 15/58] fix: Update nitro with read/ write for engine.json --- .../inference-nitro-extension/src/index.ts | 36 +++++++++++-------- .../inference-nitro-extension/src/module.ts | 4 ++- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/extensions/inference-nitro-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts index 17f207ab9..614c32586 100644 --- a/extensions/inference-nitro-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -26,6 +26,13 @@ import { requestInference } from "./helpers/sse"; import { ulid } from "ulid"; import { join } from "path"; +interface EngineSettings { + ctx_len: number; + ngl: number; + cont_batching: boolean; + embedding: boolean; +} + /** * A class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. @@ -35,6 +42,13 @@ export default class JanInferenceNitroExtension implements InferenceExtension { private static readonly _homeDir = 'engines' private static readonly _engineMetadataFileName = 'nitro.json' + private _engineSettings: EngineSettings = { + "ctx_len": 2048, + "ngl": 100, + "cont_batching": false, + "embedding": false + } + controller = new AbortController(); isCancelled = false; /** @@ -100,21 +114,13 @@ export default class JanInferenceNitroExtension implements InferenceExtension { private async writeDefaultEngineSettings() { try { - const destPath = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName) - // TODO: Check with @louis for adding new binding - // if (await fs.checkFileExists(destPath)) { - const default_engine_settings = { - "ctx_len": 2048, - "ngl": 100, - "cont_batching": false, - "embedding": false - } - console.log(`Writing nitro engine settings to ${destPath}`) - await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2)) - // } - // else { - // console.log(`Using existing nitro engine settings at ${destPath}`) - // } + const engine_json = join(JanInferenceNitroExtension._homeDir, JanInferenceNitroExtension._engineMetadataFileName) + if (await fs.checkFileExists(engine_json)) { + this._engineSettings = JSON.parse(await fs.readFile(engine_json)) + } + else { + await fs.writeFile(engine_json, JSON.stringify(this._engineSettings, null, 2)) + } } catch (err) { console.error(err) } diff --git a/extensions/inference-nitro-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts index e9b2d8eb5..3eeedec32 100644 --- a/extensions/inference-nitro-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -36,6 +36,9 @@ interface InitModelResponse { * TODO: Should it be startModel instead? */ function initModel(wrapper: any): Promise { + if (wrapper.settings.engine !== "llamacpp") { + return + } // 1. Check if the model file exists currentModelFile = wrapper.modelFullPath; log.info("Started to load model " + wrapper.modelFullPath); @@ -45,7 +48,6 @@ function initModel(wrapper: any): Promise { ...wrapper.settings, }; - log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); return ( From 486c5b8ca0c62817ef035e3148a09a815d53ff2d Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 12:10:52 +0700 Subject: [PATCH 16/58] fix: Update openai with read/ write for engine.json --- .../inference-openai-extension/src/index.ts | 68 ++++++++++++------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 4fd16a414..8d72422d3 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -20,12 +20,18 @@ import { executeOnMain, getUserSpace, fs, + Model, } from "@janhq/core"; import { InferenceExtension } from "@janhq/core"; import { requestInference } from "./helpers/sse"; import { ulid } from "ulid"; import { join } from "path"; +interface EngineSettings { + base_url?: string; + api_key?: string; +} + /** * A class that implements the InferenceExtension interface from the @janhq/core package. * The class provides methods for initializing and stopping a model, and for making inference requests. @@ -35,6 +41,10 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { private static readonly _homeDir = 'engines' private static readonly _engineMetadataFileName = 'openai.json' + private _engineSettings: EngineSettings = { + "base_url": "https://api.openai.com/v1", + "api_key": "sk-" + } controller = new AbortController(); isCancelled = false; /** @@ -52,9 +62,19 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { onLoad(): void { fs.mkdir(JanInferenceOpenAIExtension._homeDir) this.writeDefaultEngineSettings() + + // Events subscription events.on(EventName.OnMessageSent, (data) => - JanInferenceOpenAIExtension.handleMessageRequest(data, this) + JanInferenceOpenAIExtension.handleMessageRequest(data, this) ); + + events.on(EventName.OnModelInit, (data: Model) => { + JanInferenceOpenAIExtension.handleModelInit(data); + }); + + events.on(EventName.OnModelStop, (data: Model) => { + JanInferenceOpenAIExtension.handleModelStop(data); + }); } /** @@ -71,31 +91,18 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { modelId: string, settings?: ModelSettingParams ): Promise { - const userSpacePath = await getUserSpace(); - const modelFullPath = join(userSpacePath, "models", modelId, modelId); - - return executeOnMain(MODULE, "initModel", { - modelFullPath, - settings, - }); + return } private async writeDefaultEngineSettings() { try { - - const destPath = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName) - // TODO: Check with @louis for adding new binding - // if (await fs.checkFileExists(destPath)) { - const default_engine_settings = { - "base_url": "https://api.openai.com/v1", - "api_key": "sk-" - } - console.log(`Writing OpenAI engine settings to ${destPath}`) - await fs.writeFile(destPath, JSON.stringify(default_engine_settings, null, 2)) - // } - // else { - // console.log(`OpenAI engine settings already exist at ${destPath}`) - // } + const engine_json = join(JanInferenceOpenAIExtension._homeDir, JanInferenceOpenAIExtension._engineMetadataFileName) + if (await fs.checkFileExists(engine_json)) { + this._engineSettings = JSON.parse(await fs.readFile(engine_json)) + } + else { + await fs.writeFile(engine_json, JSON.stringify(this._engineSettings, null, 2)) + } } catch (err) { console.error(err) } @@ -146,6 +153,22 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { }); } + private static async handleModelInit(data: Model) { + console.log('Model init success', data) + // Add filter data engine = openai + if (data.engine !== 'openai') { return } + // If model success + events.emit(EventName.OnModelReady, {modelId: data.id}) + // If model failed + // events.emit(EventName.OnModelFail, {modelId: data.id}) + } + + private static async handleModelStop(data: Model) { + // Add filter data engine = openai + if (data.engine !== 'openai') { return } + events.emit(EventName.OnModelStop, {modelId: data.id}) + } + /** * Handles a new message request by making an inference request and emitting events. * Function registered in event manager, should be static to avoid binding issues. @@ -169,7 +192,6 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { object: "thread.message", }; events.emit(EventName.OnMessageResponse, message); - console.log(JSON.stringify(data, null, 2)); instance.isCancelled = false; instance.controller = new AbortController(); From 22f12cd02c191050a27a71922577f5693f8a4f9b Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 12:11:47 +0700 Subject: [PATCH 17/58] fix: Update model.json for Hub with engine nitro/ openai --- models/capybara-34b/model.json | 4 ++-- models/lzlv-70b/model.json | 2 +- models/neural-chat-7b/model.json | 2 +- models/noromaid-20b/model.json | 3 ++- models/openai-gpt-3.5/model.json | 6 ++++-- models/openorca-13b/model.json | 2 +- models/phind-34b/model.json | 2 +- models/rocket-3b/model.json | 2 +- models/starling-7b/model.json | 2 +- models/tiefighter-13b/model.json | 2 +- models/tinyllama-1.1b/model.json | 6 +++--- models/wizardcoder-13b/model.json | 2 +- models/yi-34b/model.json | 2 +- models/zephyr-beta-7b/model.json | 2 +- 14 files changed, 21 insertions(+), 18 deletions(-) diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index 09e6e64a0..e4263f957 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -19,7 +19,7 @@ "author": "NousResearch, The Bloke", "tags": ["34B", "Finetuned"], "size": 24320000000 - }, - "engine": "llama.cpp" + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json index 249171fcf..ca6af617e 100644 --- a/models/lzlv-70b/model.json +++ b/models/lzlv-70b/model.json @@ -20,6 +20,6 @@ "tags": ["70B", "Finetuned"], "size": 48750000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/neural-chat-7b/model.json b/models/neural-chat-7b/model.json index 634f9efe9..1d6271469 100644 --- a/models/neural-chat-7b/model.json +++ b/models/neural-chat-7b/model.json @@ -20,6 +20,6 @@ "tags": ["Recommended", "7B", "Finetuned"], "size": 4370000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/noromaid-20b/model.json b/models/noromaid-20b/model.json index 698687d8b..a7c4bceb8 100644 --- a/models/noromaid-20b/model.json +++ b/models/noromaid-20b/model.json @@ -19,6 +19,7 @@ "author": "NeverSleep, The Bloke", "tags": ["34B", "Finetuned"], "size": 12040000000 - } + }, + "engine": "llama_cpp" } \ No newline at end of file diff --git a/models/openai-gpt-3.5/model.json b/models/openai-gpt-3.5/model.json index 8d5060e6c..c794349aa 100644 --- a/models/openai-gpt-3.5/model.json +++ b/models/openai-gpt-3.5/model.json @@ -1,12 +1,14 @@ { - "source_url": "https://api.openai.com/v1", + "source_url": "https://openai.com", "id": "openai-gpt35", "object": "model", "name": "OpenAI GPT 3.5", "version": 1.0, "description": "OpenAI GPT 3.5 model is extremely good", "format": "api", - "settings": {}, + "settings": { + "base_url": "https://api.openai.com/v1" + }, "parameters": { "max_tokens": 4096 }, diff --git a/models/openorca-13b/model.json b/models/openorca-13b/model.json index edb0e24af..42c8bd96e 100644 --- a/models/openorca-13b/model.json +++ b/models/openorca-13b/model.json @@ -20,6 +20,6 @@ "tags": ["13B", "Finetuned"], "size": 9230000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json index dd68b4771..eb3c1a18a 100644 --- a/models/phind-34b/model.json +++ b/models/phind-34b/model.json @@ -20,6 +20,6 @@ "tags": ["34B", "Finetuned"], "size": 24320000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json index 4c8f29454..b1d338e11 100644 --- a/models/rocket-3b/model.json +++ b/models/rocket-3b/model.json @@ -20,5 +20,5 @@ "tags": ["Tiny", "Finetuned"], "size": 1710000000 }, - "engine": "llama.cpp" + "engine": "llama_cpp" } \ No newline at end of file diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json index a9e237ec3..52b03f8b1 100644 --- a/models/starling-7b/model.json +++ b/models/starling-7b/model.json @@ -20,6 +20,6 @@ "tags": ["Recommended", "7B","Finetuned"], "size": 4370000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/tiefighter-13b/model.json b/models/tiefighter-13b/model.json index a0fcdb04e..20075777c 100644 --- a/models/tiefighter-13b/model.json +++ b/models/tiefighter-13b/model.json @@ -20,6 +20,6 @@ "tags": ["13B", "Finetuned"], "size": 9230000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index 40bcf4c14..bc5dcb0c6 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -10,7 +10,8 @@ "ctx_len": 2048, "system_prompt": "<|system|>\n", "user_prompt": "<|user|>\n", - "ai_prompt": "<|assistant|>\n" + "ai_prompt": "<|assistant|>\n", + "engine": "nitro" }, "parameters": { "max_tokens": 2048 @@ -19,6 +20,5 @@ "author": "TinyLlama", "tags": ["Tiny", "Foundation Model"], "size": 637000000 - }, - "engine": "llama.cpp" + } } \ No newline at end of file diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json index 1b86632e9..40f275037 100644 --- a/models/wizardcoder-13b/model.json +++ b/models/wizardcoder-13b/model.json @@ -20,6 +20,6 @@ "tags": ["Recommended", "13B", "Finetuned"], "size": 9230000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json index 103e048f9..ab111591c 100644 --- a/models/yi-34b/model.json +++ b/models/yi-34b/model.json @@ -20,6 +20,6 @@ "tags": ["34B", "Foundational Model"], "size": 24320000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file diff --git a/models/zephyr-beta-7b/model.json b/models/zephyr-beta-7b/model.json index a846f6b1c..4993366fd 100644 --- a/models/zephyr-beta-7b/model.json +++ b/models/zephyr-beta-7b/model.json @@ -20,6 +20,6 @@ "tags": ["7B", "Finetuned"], "size": 4370000000 }, - "engine": "llama.cpp" + "engine": "nitro" } \ No newline at end of file From 489c8584a96c4d4ce4bc78cba1ad581464d591c0 Mon Sep 17 00:00:00 2001 From: Louis Date: Mon, 4 Dec 2023 12:52:28 +0700 Subject: [PATCH 18/58] chore: models ref event handler --- web/containers/Providers/EventHandler.tsx | 60 ++++++++++++----------- web/hooks/useActiveModel.ts | 13 ++++- 2 files changed, 43 insertions(+), 30 deletions(-) diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index a3910e266..d24caf34d 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -7,10 +7,14 @@ import { ThreadMessage, ExtensionType, MessageStatus, - Model } from '@janhq/core' import { ConversationalExtension } from '@janhq/core' -import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai' +import { useAtomValue, useSetAtom } from 'jotai' + +import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' +import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' + +import { toaster } from '../Toast' import { extensionManager } from '@/extension' import { @@ -22,45 +26,45 @@ import { threadsAtom, } from '@/helpers/atoms/Conversation.atom' -import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' -import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' -import { toaster } from '../Toast' - export default function EventHandler({ children }: { children: ReactNode }) { const addNewMessage = useSetAtom(addNewMessageAtom) const updateMessage = useSetAtom(updateMessageAtom) const { downloadedModels } = useGetDownloadedModels() - const [activeModel, setActiveModel] = useAtom(activeModelAtom) - const [stateModel, setStateModel] = useAtom(stateModelAtom) + const setActiveModel = useSetAtom(activeModelAtom) + const setStateModel = useSetAtom(stateModelAtom) const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom) const threads = useAtomValue(threadsAtom) + const modelsRef = useRef(downloadedModels) const threadsRef = useRef(threads) useEffect(() => { threadsRef.current = threads }, [threads]) + useEffect(() => { + modelsRef.current = downloadedModels + }, [downloadedModels]) + async function handleNewMessageResponse(message: ThreadMessage) { addNewMessage(message) } async function handleModelReady(res: any) { - const model = downloadedModels.find((e) => e.id === res.modelId) - setActiveModel(model) - toaster({ - title: 'Success!', - description: `Model ${res.modelId} has been started.`, - }) - setStateModel(() => ({ - state: 'stop', - loading: false, - model: res.modelId, - })) - } + const model = modelsRef.current?.find((e) => e.id === res.modelId) + setActiveModel(model) + toaster({ + title: 'Success!', + description: `Model ${res.modelId} has been started.`, + }) + setStateModel(() => ({ + state: 'stop', + loading: false, + model: res.modelId, + })) + } async function handleModelStop(res: any) { - const model = downloadedModels.find((e) => e.id === res.modelId) setTimeout(async () => { setActiveModel(undefined) setStateModel({ state: 'start', loading: false, model: '' }) @@ -72,13 +76,13 @@ export default function EventHandler({ children }: { children: ReactNode }) { } async function handleModelFail(res: any) { - const errorMessage = `${res.error}` - alert(errorMessage) - setStateModel(() => ({ - state: 'start', - loading: false, - model: res.modelId, - })) + const errorMessage = `${res.error}` + alert(errorMessage) + setStateModel(() => ({ + state: 'start', + loading: false, + model: res.modelId, + })) } async function handleMessageResponseUpdate(message: ThreadMessage) { diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 4f1565e15..3a1343489 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,5 +1,10 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { EventName, ExtensionType, InferenceExtension, events } from '@janhq/core' +import { + EventName, + ExtensionType, + InferenceExtension, + events, +} from '@janhq/core' import { Model, ModelSettingParams } from '@janhq/core' import { atom, useAtom } from 'jotai' @@ -11,7 +16,11 @@ import { extensionManager } from '@/extension' export const activeModelAtom = atom(undefined) -export const stateModelAtom = atom({ state: 'start', loading: false, model: '' }) +export const stateModelAtom = atom({ + state: 'start', + loading: false, + model: '', +}) export function useActiveModel() { const [activeModel, setActiveModel] = useAtom(activeModelAtom) From 750f09cc81bd0e55338faa97ce81d30b14d5b619 Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 23:07:59 +0700 Subject: [PATCH 19/58] fix: update engine field in tiny llama 1.1b --- models/tinyllama-1.1b/model.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index bc5dcb0c6..bb6aeaf5c 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -10,8 +10,7 @@ "ctx_len": 2048, "system_prompt": "<|system|>\n", "user_prompt": "<|user|>\n", - "ai_prompt": "<|assistant|>\n", - "engine": "nitro" + "ai_prompt": "<|assistant|>\n" }, "parameters": { "max_tokens": 2048 @@ -20,5 +19,6 @@ "author": "TinyLlama", "tags": ["Tiny", "Foundation Model"], "size": 637000000 - } + }, + "engine": "nitro" } \ No newline at end of file From 0c838cecb10998a89112fc11064c3115ab84c194 Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 23:10:31 +0700 Subject: [PATCH 20/58] fix: Update event types --- core/src/events.ts | 4 +++- core/src/types/index.ts | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/src/events.ts b/core/src/events.ts index 81451c1f0..bfaf3ea58 100644 --- a/core/src/events.ts +++ b/core/src/events.ts @@ -14,8 +14,10 @@ export enum EventName { OnModelReady = "OnModelReady", /** The `OnModelFail` event is emitted when a model fails loading. */ OnModelFail = "OnModelFail", - /** The `OnModelStop` event is emitted when a model fails loading. */ + /** The `OnModelStop` event is emitted when a model start to stop. */ OnModelStop = "OnModelStop", + /** The `OnModelStopped` event is emitted when a model stopped ok. */ + OnModelStopped = "OnModelStopped", } /** diff --git a/core/src/types/index.ts b/core/src/types/index.ts index 5b45d4cc8..b80012dd7 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -43,6 +43,9 @@ export type MessageRequest = { /** Runtime parameters for constructing a chat completion request **/ parameters?: ModelRuntimeParam; + + /** Settings for constructing a chat completion request **/ + model?: ModelInfo }; /** @@ -154,6 +157,7 @@ export type ModelInfo = { id: string; settings: ModelSettingParams; parameters: ModelRuntimeParam; + engine: InferenceEngine; }; /** @@ -172,7 +176,7 @@ export type ThreadState = { */ enum InferenceEngine { - llama_cpp = "llama_cpp", + nitro = "nitro", openai = "openai", nvidia_triton = "nvidia_triton", hf_endpoint = "hf_endpoint", @@ -246,7 +250,7 @@ export interface Model { */ metadata: ModelMetadata; /** - * The model engine. Enum: "llamacpp" "openai" + * The model engine. */ engine: InferenceEngine; } From a8e33c2a97ad88b5b6537420a97f5892c17b57ce Mon Sep 17 00:00:00 2001 From: hiro Date: Mon, 4 Dec 2023 23:18:26 +0700 Subject: [PATCH 21/58] fix: Update engine as enum --- docs/openapi/specs/models.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml index 1bd7e65d7..1322b90ee 100644 --- a/docs/openapi/specs/models.yaml +++ b/docs/openapi/specs/models.yaml @@ -289,7 +289,7 @@ components: engine: type: string description: "The engine used by the model." - example: "llamacpp" + enum: [nitro, openai, hf_inference] quantization: type: string description: "Quantization parameter of the model." From a51b206cb8e769f2839f9279c071fcd68e7fd3f0 Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 5 Dec 2023 00:25:50 +0700 Subject: [PATCH 22/58] fix: Add OnModelStopped Event --- web/containers/Providers/EventHandler.tsx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index d24caf34d..533e8cd4a 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -7,6 +7,7 @@ import { ThreadMessage, ExtensionType, MessageStatus, + Model, } from '@janhq/core' import { ConversationalExtension } from '@janhq/core' import { useAtomValue, useSetAtom } from 'jotai' @@ -64,13 +65,13 @@ export default function EventHandler({ children }: { children: ReactNode }) { })) } - async function handleModelStop(res: any) { + async function handleModelStopped(model: Model) { setTimeout(async () => { setActiveModel(undefined) setStateModel({ state: 'start', loading: false, model: '' }) toaster({ title: 'Success!', - description: `Model ${res.modelId} has been stopped.`, + description: `Model ${model.id} has been stopped.`, }) }, 500) } @@ -123,7 +124,7 @@ export default function EventHandler({ children }: { children: ReactNode }) { events.on(EventName.OnMessageUpdate, handleMessageResponseUpdate) events.on(EventName.OnModelReady, handleModelReady) events.on(EventName.OnModelFail, handleModelFail) - events.on(EventName.OnModelStop, handleModelStop) + events.on(EventName.OnModelStopped, handleModelStopped) } // eslint-disable-next-line react-hooks/exhaustive-deps }, []) From ffbfaf1fd14e4e8d93488d08fbfd5101a05d6769 Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 5 Dec 2023 00:26:19 +0700 Subject: [PATCH 23/58] feat: Add Event OnModelStop emission to web --- web/hooks/useActiveModel.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 3a1343489..699b16279 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,8 +1,6 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { EventName, - ExtensionType, - InferenceExtension, events, } from '@janhq/core' import { Model, ModelSettingParams } from '@janhq/core' @@ -60,8 +58,9 @@ export function useActiveModel() { } const stopModel = async (modelId: string) => { + const model = downloadedModels.find((e) => e.id === modelId) setStateModel({ state: 'stop', loading: true, model: modelId }) - events.emit(EventName.OnModelStop, modelId) + events.emit(EventName.OnModelStop, model) } return { activeModel, startModel, stopModel, stateModel } From 05b9a7bfc93bfc4064075effd9a931fff8a95cce Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 5 Dec 2023 00:26:41 +0700 Subject: [PATCH 24/58] fix: Delete default oai gpt 3.5 settings --- models/openai-gpt-3.5/model.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/models/openai-gpt-3.5/model.json b/models/openai-gpt-3.5/model.json index c794349aa..0950981af 100644 --- a/models/openai-gpt-3.5/model.json +++ b/models/openai-gpt-3.5/model.json @@ -6,9 +6,7 @@ "version": 1.0, "description": "OpenAI GPT 3.5 model is extremely good", "format": "api", - "settings": { - "base_url": "https://api.openai.com/v1" - }, + "settings": {}, "parameters": { "max_tokens": 4096 }, From cb60a7cf92dd767a48b7b93271a97040a3b859fc Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 5 Dec 2023 00:27:09 +0700 Subject: [PATCH 25/58] chore: Change type ModelRuntimeParam to ModelRuntimeParams --- core/src/types/index.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/src/types/index.ts b/core/src/types/index.ts index b80012dd7..99f7b1d0f 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -42,7 +42,7 @@ export type MessageRequest = { messages?: ChatCompletionMessage[]; /** Runtime parameters for constructing a chat completion request **/ - parameters?: ModelRuntimeParam; + parameters?: ModelRuntimeParams; /** Settings for constructing a chat completion request **/ model?: ModelInfo @@ -156,7 +156,7 @@ export type ThreadAssistantInfo = { export type ModelInfo = { id: string; settings: ModelSettingParams; - parameters: ModelRuntimeParam; + parameters: ModelRuntimeParams; engine: InferenceEngine; }; @@ -243,7 +243,7 @@ export interface Model { /** * The model runtime parameters. */ - parameters: ModelRuntimeParam; + parameters: ModelRuntimeParams; /** * Metadata of the model. @@ -287,7 +287,7 @@ export type ModelSettingParams = { /** * The available model runtime parameters. */ -export type ModelRuntimeParam = { +export type ModelRuntimeParams = { temperature?: number; token_limit?: number; top_k?: number; From fb8729bff3f80bf7441190b349124c38f9e99c4f Mon Sep 17 00:00:00 2001 From: hiro Date: Tue, 5 Dec 2023 00:27:33 +0700 Subject: [PATCH 26/58] fix: Check model engine openai for `Use` --- web/screens/ExploreModels/ExploreModelItemHeader/index.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx index f5d54f0be..69cd8af3e 100644 --- a/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx +++ b/web/screens/ExploreModels/ExploreModelItemHeader/index.tsx @@ -57,7 +57,7 @@ const ExploreModelItemHeader: React.FC = ({ model, onClick, open }) => { let downloadButton; - if (model.engine !== 'nitro') { + if (model.engine === 'openai') { downloadButton = ( - ); - } else if (model.engine === 'nitro') { - downloadButton = ( - - ); - } + let downloadButton = ( + + ) const onUseModelClick = () => { startModel(model.id) From f9e73b0bbdf7949c1b740323671e3f2a4c71a834 Mon Sep 17 00:00:00 2001 From: hiro Date: Fri, 8 Dec 2023 22:42:07 +0700 Subject: [PATCH 54/58] fix: Change base_url to full_url --- extensions/inference-openai-extension/src/@types/global.d.ts | 2 +- extensions/inference-openai-extension/src/helpers/sse.ts | 2 +- extensions/inference-openai-extension/src/index.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts index 5e9fd4d8a..bb0c6e9bf 100644 --- a/extensions/inference-openai-extension/src/@types/global.d.ts +++ b/extensions/inference-openai-extension/src/@types/global.d.ts @@ -3,7 +3,7 @@ import { Model } from "@janhq/core"; declare const MODULE: string; declare interface EngineSettings { - base_url?: string; + full_url?: string; api_key?: string; } diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts index bc170a817..d13eb7c33 100644 --- a/extensions/inference-openai-extension/src/helpers/sse.ts +++ b/extensions/inference-openai-extension/src/helpers/sse.ts @@ -22,7 +22,7 @@ export function requestInference( // Model parameters spreading ...model.parameters, }); - fetch(`${engine.base_url}/chat/completions`, { + fetch(`${engine.full_url}`, { method: "POST", headers: { "Content-Type": "application/json", diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 06e0f5e04..8a7955746 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -37,7 +37,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { private static _currentModel: OpenAIModel; private static _engineSettings: EngineSettings = { - base_url: "https://api.openai.com/v1", + full_url: "https://api.openai.com/v1/chat/completion", api_key: "sk-", }; From 0ef9a581d39c8a361b498f91f8bbde2f826777f3 Mon Sep 17 00:00:00 2001 From: hiro Date: Sat, 9 Dec 2023 00:36:55 +0700 Subject: [PATCH 55/58] fix: BAT for nitro --- extensions/inference-nitro-extension/download.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat index 723268919..7df449040 100644 --- a/extensions/inference-nitro-extension/download.bat +++ b/extensions/inference-nitro-extension/download.bat @@ -1,3 +1,3 @@ @echo off -set /p NITRO_VERSION=<./nitro/version.txt +set /p NITRO_VERSION=<./bin/version.txt .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu From c32ad0aff7a7de9a0a7c8c73b2326a33cde05945 Mon Sep 17 00:00:00 2001 From: hiro Date: Sat, 9 Dec 2023 00:42:48 +0700 Subject: [PATCH 56/58] fix: small change in nitro bin location --- extensions/inference-nitro-extension/download.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat index 7df449040..1776b7dfe 100644 --- a/extensions/inference-nitro-extension/download.bat +++ b/extensions/inference-nitro-extension/download.bat @@ -1,3 +1,3 @@ @echo off set /p NITRO_VERSION=<./bin/version.txt -.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu +.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./bin/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu From 7e3e648e2acf2fac868507567bd0767238f17cf5 Mon Sep 17 00:00:00 2001 From: hiro Date: Sat, 9 Dec 2023 00:56:24 +0700 Subject: [PATCH 57/58] fix: inference extensions small syntax fix --- extensions/inference-nitro-extension/src/helpers/sse.ts | 1 - extensions/inference-openai-extension/src/index.ts | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/extensions/inference-nitro-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts index 6edad302c..d9d8712dd 100644 --- a/extensions/inference-nitro-extension/src/helpers/sse.ts +++ b/extensions/inference-nitro-extension/src/helpers/sse.ts @@ -16,7 +16,6 @@ export function requestInference( messages: recentMessages, model: model.id, stream: true, - // TODO: Model parameters spreading // ...model.parameters, }); fetch(INFERENCE_URL, { diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts index 8a7955746..7e3e6e71e 100644 --- a/extensions/inference-openai-extension/src/index.ts +++ b/extensions/inference-openai-extension/src/index.ts @@ -37,7 +37,7 @@ export default class JanInferenceOpenAIExtension implements InferenceExtension { private static _currentModel: OpenAIModel; private static _engineSettings: EngineSettings = { - full_url: "https://api.openai.com/v1/chat/completion", + full_url: "https://api.openai.com/v1/chat/completions", api_key: "sk-", }; From bbffaafcdcb32cbb5ab06dd5a001ccfb9fc61a0c Mon Sep 17 00:00:00 2001 From: hiro Date: Sat, 9 Dec 2023 00:56:39 +0700 Subject: [PATCH 58/58] feat: Added support for Azure OpenAI API --- .../inference-openai-extension/src/helpers/sse.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts index d13eb7c33..c8ddefca6 100644 --- a/extensions/inference-openai-extension/src/helpers/sse.ts +++ b/extensions/inference-openai-extension/src/helpers/sse.ts @@ -15,12 +15,15 @@ export function requestInference( controller?: AbortController ): Observable { return new Observable((subscriber) => { + let model_id: string = model.id + if (engine.full_url.includes("openai.azure.com")){ + model_id = engine.full_url.split("/")[5] + } const requestBody = JSON.stringify({ messages: recentMessages, stream: true, - model: model.id, - // Model parameters spreading - ...model.parameters, + model: model_id + // ...model.parameters, }); fetch(`${engine.full_url}`, { method: "POST", @@ -29,6 +32,7 @@ export function requestInference( Accept: "text/event-stream", "Access-Control-Allow-Origin": "*", Authorization: `Bearer ${engine.api_key}`, + "api-key": `${engine.api_key}`, }, body: requestBody, signal: controller?.signal,