From d133594db70a11b0a0873396925a7c1c9d05866f Mon Sep 17 00:00:00 2001 From: Louis Date: Fri, 20 Oct 2023 10:48:53 +0700 Subject: [PATCH] chore: clean --- plugins/inference-plugin/@types/global.d.ts | 1 + plugins/inference-plugin/index.ts | 24 ++++---- plugins/inference-plugin/module.ts | 54 ++++++++---------- .../{nitro_mac_amd64 => nitro_mac_intel} | Bin plugins/inference-plugin/webpack.config.js | 1 + 5 files changed, 39 insertions(+), 41 deletions(-) rename plugins/inference-plugin/nitro/{nitro_mac_amd64 => nitro_mac_intel} (100%) diff --git a/plugins/inference-plugin/@types/global.d.ts b/plugins/inference-plugin/@types/global.d.ts index 87105f099..a0c04db1b 100644 --- a/plugins/inference-plugin/@types/global.d.ts +++ b/plugins/inference-plugin/@types/global.d.ts @@ -1,2 +1,3 @@ declare const PLUGIN_NAME: string; declare const MODULE_PATH: string; +declare const INFERENCE_URL: string; diff --git a/plugins/inference-plugin/index.ts b/plugins/inference-plugin/index.ts index aa2cbe935..e86511404 100644 --- a/plugins/inference-plugin/index.ts +++ b/plugins/inference-plugin/index.ts @@ -1,17 +1,23 @@ -import { EventName, InferenceService, NewMessageRequest, PluginService, core, events, store } from "@janhq/core"; +import { + EventName, + InferenceService, + NewMessageRequest, + PluginService, + events, + store, + invokePluginFunc, +} from "@janhq/core"; import { Observable } from "rxjs"; -const inferenceUrl = "http://127.0.0.1:3928/llama/chat_completion"; - -const initModel = async (product) => core.invokePluginFunc(MODULE_PATH, "initModel", product); +const initModel = async (product) => invokePluginFunc(MODULE_PATH, "initModel", product); const stopModel = () => { - core.invokePluginFunc(MODULE_PATH, "killSubprocess"); + invokePluginFunc(MODULE_PATH, "killSubprocess"); }; function requestInference(recentMessages: any[]): Observable { return new Observable((subscriber) => { - fetch(inferenceUrl, { + fetch(INFERENCE_URL, { method: "POST", headers: { "Content-Type": "application/json", @@ -22,11 +28,7 @@ function requestInference(recentMessages: any[]): Observable { messages: recentMessages, stream: true, model: "gpt-3.5-turbo", - max_tokens: 2048, - stop: ["hello"], - frequency_penalty: 0, - presence_penalty: 0, - temperature: 0 + max_tokens: 500, }), }) .then(async (response) => { diff --git a/plugins/inference-plugin/module.ts b/plugins/inference-plugin/module.ts index 407de81bb..e3bad81d8 100644 --- a/plugins/inference-plugin/module.ts +++ b/plugins/inference-plugin/module.ts @@ -5,8 +5,8 @@ const fs = require("fs"); const tcpPortUsed = require("tcp-port-used"); const { killPortProcess } = require("kill-port-process"); -let subprocess = null; const PORT = 3928; +let subprocess = null; const initModel = (fileName) => { return ( @@ -15,9 +15,7 @@ const initModel = (fileName) => { reject("Model not found, please download again."); } if (subprocess) { - console.error( - "A subprocess is already running. Attempt to kill then reinit." - ); + console.error("A subprocess is already running. Attempt to kill then reinit."); killSubprocess(); } resolve(fileName); @@ -32,26 +30,13 @@ const initModel = (fileName) => { // Spawn Nitro subprocess to load model .then(() => { let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default - - const config = { - llama_model_path: "", - ctx_len: 2048, - ngl: 100, - embedding: true // Always enable embedding mode on - } - - const modelPath = path.join(app.getPath("userData"), fileName); - - config.llama_model_path = modelPath; - let binaryName; if (process.platform === "win32") { binaryName = "nitro_windows_amd64_cuda.exe"; } else if (process.platform === "darwin") { // Mac OS platform - binaryName = - process.arch === "arm64" ? "nitro_mac_arm64" : "nitro_mac_amd64"; + binaryName = process.arch === "arm64" ? "nitro_mac_arm64" : "nitro_mac_intel"; } else { // Linux binaryName = "nitro_linux_amd64_cuda"; // For other platforms @@ -60,7 +45,6 @@ const initModel = (fileName) => { const binaryPath = path.join(binaryFolder, binaryName); // Execute the binary - subprocess = spawn(binaryPath, { cwd: binaryFolder }); // Handle subprocess output @@ -78,20 +62,30 @@ const initModel = (fileName) => { }); }) .then(() => tcpPortUsed.waitUntilUsed(PORT, 300, 30000)) - .then((config) => { - const initModel = fetch(`http://127.0.0.1:${PORT}/inferences/llamacpp/loadmodel`, { + .then(() => { + const llama_model_path = path.join(app.getPath("userData"), fileName); + + const config = { + llama_model_path, + ctx_len: 2048, + ngl: 100, + embedding: true, // Always enable embedding mode on + }; + + // Load model config + return fetch(`http://127.0.0.1:${PORT}/inferences/llamacpp/loadmodel`, { method: "POST", headers: { "Content-Type": "application/json", - }, - body: JSON.stringify(config), - }) - .then((res) => { - if (res.ok) { - return {}; - } - throw new Error("Nitro: Model failed to load."); - }) + }, + body: JSON.stringify(config), + }); + }) + .then((res) => { + if (res.ok) { + return {}; + } + throw new Error("Nitro: Model failed to load."); }) .catch((err) => { return { error: err }; diff --git a/plugins/inference-plugin/nitro/nitro_mac_amd64 b/plugins/inference-plugin/nitro/nitro_mac_intel similarity index 100% rename from plugins/inference-plugin/nitro/nitro_mac_amd64 rename to plugins/inference-plugin/nitro/nitro_mac_intel diff --git a/plugins/inference-plugin/webpack.config.js b/plugins/inference-plugin/webpack.config.js index 9b72f3120..2d9e18ab2 100644 --- a/plugins/inference-plugin/webpack.config.js +++ b/plugins/inference-plugin/webpack.config.js @@ -19,6 +19,7 @@ module.exports = { new webpack.DefinePlugin({ PLUGIN_NAME: JSON.stringify(packageJson.name), MODULE_PATH: JSON.stringify(`${packageJson.name}/${packageJson.module}`), + INFERENCE_URL: JSON.stringify("http://127.0.0.1:3928/inferences/llamacpp/chat_completion"), }), ], output: {