refactor: introduce node module in nitro extension (#1630)

This commit is contained in:
Louis 2024-01-17 11:28:54 +07:00 committed by GitHub
parent db987e88f9
commit f4f861d0e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 782 additions and 651 deletions

View File

@ -15,13 +15,6 @@
"dist"
],
"author": "Jan <service@jan.ai>",
"repository": {
"type": "git",
"url": ""
},
"engines": {
"node": ">=6.0.0"
},
"exports": {
".": "./dist/core.umd.js",
"./sdk": "./dist/core.umd.js",
@ -49,53 +42,6 @@
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"start": "rollup -c rollup.config.ts -w"
},
"lint-staged": {
"{src,test}/**/*.ts": [
"prettier --write",
"git add"
]
},
"config": {
"commitizen": {
"path": "node_modules/cz-conventional-changelog"
}
},
"jest": {
"transform": {
".(ts|tsx)": "ts-jest"
},
"testEnvironment": "node",
"testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx|js)$",
"moduleFileExtensions": [
"ts",
"tsx",
"js"
],
"coveragePathIgnorePatterns": [
"/node_modules/",
"/test/"
],
"coverageThreshold": {
"global": {
"branches": 90,
"functions": 95,
"lines": 95,
"statements": 95
}
},
"collectCoverageFrom": [
"src/*.{js,ts}"
]
},
"prettier": {
"semi": false,
"singleQuote": true
},
"commitlint": {
"extends": [
"@commitlint/config-conventional"
]
},
"devDependencies": {
"@types/node": "^12.0.2",
"rollup": "^2.38.5",
@ -104,7 +50,6 @@
"rollup-plugin-node-resolve": "^5.2.0",
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0",
"ts-node": "^7.0.1",
"tslib": "^2.6.2",
"typescript": "^5.2.2"
}

View File

@ -104,6 +104,9 @@ export type ModelSettingParams = {
n_parallel?: number
cpu_threads?: number
prompt_template?: string
system_prompt?: string
ai_prompt?: string
user_prompt?: string
}
/**

View File

@ -3,11 +3,11 @@
"version": "1.0.0",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See nitro.jan.ai",
"main": "dist/index.js",
"module": "dist/module.js",
"node": "dist/node/index.cjs.js",
"author": "Jan <service@jan.ai>",
"license": "AGPL-3.0",
"scripts": {
"build": "tsc -b . && webpack --config webpack.config.js",
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro",
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
"downloadnitro:win32": "download.bat",
@ -19,24 +19,33 @@
},
"exports": {
".": "./dist/index.js",
"./main": "./dist/module.js"
"./main": "./dist/node/index.cjs.js"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^25.0.7",
"@rollup/plugin-json": "^6.1.0",
"@rollup/plugin-node-resolve": "^15.2.3",
"@types/node": "^20.11.4",
"@types/tcp-port-used": "^1.0.4",
"cpx": "^1.5.0",
"download-cli": "^1.1.1",
"rimraf": "^3.0.2",
"rollup": "^2.38.5",
"rollup-plugin-define": "^1.0.1",
"rollup-plugin-sourcemaps": "^0.6.3",
"rollup-plugin-typescript2": "^0.36.0",
"run-script-os": "^1.1.6",
"webpack": "^5.88.2",
"webpack-cli": "^5.1.4"
"typescript": "^5.3.3"
},
"dependencies": {
"@janhq/core": "file:../../core",
"download-cli": "^1.1.1",
"@rollup/plugin-replace": "^5.0.5",
"@types/os-utils": "^0.0.4",
"fetch-retry": "^5.0.6",
"os-utils": "^0.0.14",
"path-browserify": "^1.0.1",
"rxjs": "^7.8.1",
"tcp-port-used": "^1.0.2",
"ts-loader": "^9.5.0",
"ulid": "^2.3.0"
},
"engines": {

View File

@ -0,0 +1,77 @@
import resolve from "@rollup/plugin-node-resolve";
import commonjs from "@rollup/plugin-commonjs";
import sourceMaps from "rollup-plugin-sourcemaps";
import typescript from "rollup-plugin-typescript2";
import json from "@rollup/plugin-json";
import replace from "@rollup/plugin-replace";
const packageJson = require("./package.json");
const pkg = require("./package.json");
export default [
{
input: `src/index.ts`,
output: [{ file: pkg.main, format: "es", sourcemap: true }],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: [],
watch: {
include: "src/**",
},
plugins: [
replace({
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL ||
"http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
),
TROUBLESHOOTING_URL: JSON.stringify(
"https://jan.ai/guides/troubleshooting"
),
}),
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Compile TypeScript files
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: [".js", ".ts", ".svelte"],
}),
// Resolve source maps to the original source
sourceMaps(),
],
},
{
input: `src/node/index.ts`,
output: [
{ file: "dist/node/index.cjs.js", format: "cjs", sourcemap: true },
],
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
external: ["@janhq/core/node"],
watch: {
include: "src/node/**",
},
plugins: [
// Allow json resolution
json(),
// Compile TypeScript files
typescript({ useTsconfigDeclarationDir: true }),
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
commonjs(),
// Allow node_modules resolution, so you can use 'external' to control
// which external modules to include in the bundle
// https://github.com/rollup/rollup-plugin-node-resolve#usage
resolve({
extensions: [".ts", ".js", ".json"],
}),
// Resolve source maps to the original source
sourceMaps(),
],
},
];

View File

@ -1,4 +1,4 @@
declare const MODULE: string;
declare const NODE: string;
declare const INFERENCE_URL: string;
declare const TROUBLESHOOTING_URL: string;

View File

@ -26,7 +26,6 @@ import {
} from "@janhq/core";
import { requestInference } from "./helpers/sse";
import { ulid } from "ulid";
import { join } from "path";
/**
* A class that implements the InferenceExtension interface from the @janhq/core package.
@ -43,7 +42,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
*/
private static readonly _intervalHealthCheck = 5 * 1000;
private _currentModel: Model;
private _currentModel: Model | undefined;
private _engineSettings: EngineSettings = {
ctx_len: 2048,
@ -82,7 +81,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
await fs
.mkdirSync(JanInferenceNitroExtension._homeDir)
.catch((err) => console.debug(err));
.catch((err: Error) => console.debug(err));
}
if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
@ -90,7 +89,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
this.writeDefaultEngineSettings();
// Events subscription
events.on(EventName.OnMessageSent, (data) => this.onMessageRequest(data));
events.on(EventName.OnMessageSent, (data: MessageRequest) =>
this.onMessageRequest(data)
);
events.on(EventName.OnModelInit, (model: Model) => this.onModelInit(model));
@ -99,7 +100,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
events.on(EventName.OnInferenceStopped, () => this.onInferenceStopped());
// Attempt to fetch nvidia info
await executeOnMain(MODULE, "updateNvidiaInfo", {});
await executeOnMain(NODE, "updateNvidiaInfo", {});
}
/**
@ -109,10 +110,10 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
private async writeDefaultEngineSettings() {
try {
const engineFile = join(
const engineFile = await joinPath([
JanInferenceNitroExtension._homeDir,
JanInferenceNitroExtension._engineMetadataFileName
);
JanInferenceNitroExtension._engineMetadataFileName,
]);
if (await fs.existsSync(engineFile)) {
const engine = await fs.readFileSync(engineFile, "utf-8");
this._engineSettings =
@ -133,12 +134,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
const modelFullPath = await joinPath(["models", model.id]);
const nitroInitResult = await executeOnMain(MODULE, "initModel", {
modelFullPath: modelFullPath,
model: model,
const nitroInitResult = await executeOnMain(NODE, "runModel", {
modelFullPath,
model,
});
if (nitroInitResult.error === null) {
if (nitroInitResult?.error) {
events.emit(EventName.OnModelFail, model);
return;
}
@ -155,12 +156,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
private async onModelStop(model: Model) {
if (model.engine !== "nitro") return;
await executeOnMain(MODULE, "stopModel");
await executeOnMain(NODE, "stopModel");
events.emit(EventName.OnModelStopped, {});
// stop the periocally health check
if (this.getNitroProcesHealthIntervalId) {
console.debug("Stop calling Nitro process health check");
clearInterval(this.getNitroProcesHealthIntervalId);
this.getNitroProcesHealthIntervalId = undefined;
}
@ -170,7 +170,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
* Periodically check for nitro process's health.
*/
private async periodicallyGetNitroHealth(): Promise<void> {
const health = await executeOnMain(MODULE, "getCurrentNitroProcessInfo");
const health = await executeOnMain(NODE, "getCurrentNitroProcessInfo");
const isRunning = this.nitroProcessInfo?.isRunning ?? false;
if (isRunning && health.isRunning === false) {
@ -204,6 +204,8 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
};
return new Promise(async (resolve, reject) => {
if (!this._currentModel) return Promise.reject("No model loaded");
requestInference(data.messages ?? [], this._currentModel).subscribe({
next: (_content) => {},
complete: async () => {
@ -223,7 +225,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
* @param {MessageRequest} data - The data for the new message request.
*/
private async onMessageRequest(data: MessageRequest) {
if (data.model.engine !== "nitro") return;
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
return;
}
const timestamp = Date.now();
const message: ThreadMessage = {
@ -242,11 +246,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
this.isCancelled = false;
this.controller = new AbortController();
requestInference(
data.messages ?? [],
{ ...this._currentModel, ...data.model },
this.controller
).subscribe({
// @ts-ignore
const model: Model = {
...(this._currentModel || {}),
...(data.model || {}),
};
requestInference(data.messages ?? [], model, this.controller).subscribe({
next: (content) => {
const messageContent: ThreadContent = {
type: ContentType.Text,

View File

@ -1,514 +0,0 @@
const fs = require("fs");
const path = require("path");
const { exec, spawn } = require("child_process");
const tcpPortUsed = require("tcp-port-used");
const fetchRetry = require("fetch-retry")(global.fetch);
const osUtils = require("os-utils");
const { readFileSync, writeFileSync, existsSync } = require("fs");
const { log } = require("@janhq/core/node");
// The PORT to use for the Nitro subprocess
const PORT = 3928;
const LOCAL_HOST = "127.0.0.1";
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
const SUPPORTED_MODEL_FORMAT = ".gguf";
const NVIDIA_INFO_FILE = path.join(
require("os").homedir(),
"jan",
"settings",
"settings.json"
);
// The subprocess instance for Nitro
let subprocess = undefined;
let currentModelFile: string = undefined;
let currentSettings = undefined;
let nitroProcessInfo = undefined;
/**
* Default GPU settings
**/
const DEFALT_SETTINGS = {
notify: true,
run_mode: "cpu",
nvidia_driver: {
exist: false,
version: "",
},
cuda: {
exist: false,
version: "",
},
gpus: [],
gpu_highest_vram: "",
};
/**
* Stops a Nitro subprocess.
* @param wrapper - The model wrapper.
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
function stopModel(): Promise<void> {
return killSubprocess();
}
/**
* Initializes a Nitro subprocess to load a machine learning model.
* @param wrapper - The model wrapper.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
* TODO: Should it be startModel instead?
*/
async function initModel(wrapper: any): Promise<ModelOperationResponse> {
currentModelFile = wrapper.modelFullPath;
const janRoot = path.join(require("os").homedir(), "jan");
if (!currentModelFile.includes(janRoot)) {
currentModelFile = path.join(janRoot, currentModelFile);
}
const files: string[] = fs.readdirSync(currentModelFile);
// Look for GGUF model file
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
);
currentModelFile = path.join(currentModelFile, ggufBinFile);
if (wrapper.model.engine !== "nitro") {
return Promise.resolve({ error: "Not a nitro model" });
} else {
const nitroResourceProbe = await getResourcesInfo();
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (wrapper.model.settings.prompt_template) {
const promptTemplate = wrapper.model.settings.prompt_template;
const prompt = promptTemplateConverter(promptTemplate);
if (prompt.error) {
return Promise.resolve({ error: prompt.error });
}
wrapper.model.settings.system_prompt = prompt.system_prompt;
wrapper.model.settings.user_prompt = prompt.user_prompt;
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
}
currentSettings = {
llama_model_path: currentModelFile,
...wrapper.model.settings,
// This is critical and requires real system information
cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
};
return loadModel(nitroResourceProbe);
}
}
async function loadModel(nitroResourceProbe: any | undefined) {
// Gather system information for CPU physical cores and memory
if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo();
return killSubprocess()
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
.then(() => {
/**
* There is a problem with Windows process manager
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === "win32") {
return new Promise((resolve) => setTimeout(resolve, 500));
} else {
return Promise.resolve();
}
})
.then(() => spawnNitroProcess(nitroResourceProbe))
.then(() => loadLLMModel(currentSettings))
.then(validateModelStatus)
.catch((err) => {
log(`[NITRO]::Error: ${err}`);
// TODO: Broadcast error so app could display proper error message
return { error: err, currentModelFile };
});
}
function promptTemplateConverter(promptTemplate) {
// Split the string using the markers
const systemMarker = "{system_message}";
const promptMarker = "{prompt}";
if (
promptTemplate.includes(systemMarker) &&
promptTemplate.includes(promptMarker)
) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker);
const promptIndex = promptTemplate.indexOf(promptMarker);
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
);
// Return the split parts
return { system_prompt, user_prompt, ai_prompt };
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
);
const system_prompt = "";
// Return the split parts
return { system_prompt, user_prompt, ai_prompt };
}
// Return an error if none of the conditions are met
return { error: "Cannot split prompt template" };
}
/**
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/
function loadLLMModel(settings): Promise<Response> {
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
}).catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
});
}
/**
* Validates the status of a model.
* @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
* If the model is loaded successfully, the object is empty.
* If the model is not loaded successfully, the object contains an error message.
*/
async function validateModelStatus(): Promise<ModelOperationResponse> {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: "GET",
headers: {
"Content-Type": "application/json",
},
retries: 5,
retryDelay: 500,
}).then(async (res: Response) => {
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json();
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
return { error: undefined };
}
}
return { error: "Model loading failed" };
});
}
/**
* Terminates the Nitro subprocess.
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
async function killSubprocess(): Promise<void> {
const controller = new AbortController();
setTimeout(() => controller.abort(), 5000);
log(`[NITRO]::Debug: Request to kill Nitro`);
return fetch(NITRO_HTTP_KILL_URL, {
method: "DELETE",
signal: controller.signal,
})
.then(() => {
subprocess?.kill();
subprocess = undefined;
})
.catch(() => {})
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
}
/**
* Spawns a Nitro subprocess.
* @param nitroResourceProbe - The Nitro resource probe.
* @returns A promise that resolves when the Nitro subprocess is started.
*/
function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
return new Promise(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
let cudaVisibleDevices = "";
let binaryName;
if (process.platform === "win32") {
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "win-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
} else {
binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
}
binaryName = "nitro.exe";
} else if (process.platform === "darwin") {
if (process.arch === "arm64") {
binaryFolder = path.join(binaryFolder, "mac-arm64");
} else {
binaryFolder = path.join(binaryFolder, "mac-x64");
}
binaryName = "nitro";
} else {
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "linux-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
} else {
binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
}
binaryName = "nitro";
}
const binaryPath = path.join(binaryFolder, binaryName);
// Execute the binary
subprocess = spawn(binaryPath, ["1", LOCAL_HOST, PORT.toString()], {
cwd: binaryFolder,
env: {
...process.env,
CUDA_VISIBLE_DEVICES: cudaVisibleDevices,
},
});
// Handle subprocess output
subprocess.stdout.on("data", (data) => {
log(`[NITRO]::Debug: ${data}`);
});
subprocess.stderr.on("data", (data) => {
log(`[NITRO]::Error: ${data}`);
});
subprocess.on("close", (code) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
subprocess = null;
reject(`child process exited with code ${code}`);
});
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
resolve(nitroResourceProbe);
});
});
}
/**
* Get the system resources information
* TODO: Move to Core so that it can be reused
*/
function getResourcesInfo(): Promise<ResourcesInfo> {
return new Promise(async (resolve) => {
const cpu = await osUtils.cpuCount();
log(`[NITRO]::CPU informations - ${cpu}`);
const response: ResourcesInfo = {
numCpuPhysicalCore: cpu,
memAvailable: 0,
};
resolve(response);
});
}
/**
* This will retrive GPU informations and persist settings.json
* Will be called when the extension is loaded to turn on GPU acceleration if supported
*/
async function updateNvidiaInfo() {
if (process.platform !== "darwin") {
await Promise.all([
updateNvidiaDriverInfo(),
updateCudaExistence(),
updateGpuInfo(),
]);
}
}
/**
* Retrieve current nitro process
*/
const getCurrentNitroProcessInfo = (): Promise<any> => {
nitroProcessInfo = {
isRunning: subprocess != null,
};
return nitroProcessInfo;
};
/**
* Every module should have a dispose function
* This will be called when the extension is unloaded and should clean up any resources
* Also called when app is closed
*/
function dispose() {
// clean other registered resources here
killSubprocess();
}
/**
* Validate nvidia and cuda for linux and windows
*/
async function updateNvidiaDriverInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=driver_version --format=csv,noheader",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
if (!error) {
const firstLine = stdout.split("\n")[0].trim();
data["nvidia_driver"].exist = true;
data["nvidia_driver"].version = firstLine;
} else {
data["nvidia_driver"].exist = false;
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
Promise.resolve();
}
);
}
/**
* Check if file exists in paths
*/
function checkFileExistenceInPaths(file: string, paths: string[]): boolean {
return paths.some((p) => existsSync(path.join(p, file)));
}
/**
* Validate cuda for linux and windows
*/
function updateCudaExistence() {
let filesCuda12: string[];
let filesCuda11: string[];
let paths: string[];
let cudaVersion: string = "";
if (process.platform === "win32") {
filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
} else {
filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: [];
paths.push("/usr/lib/x86_64-linux-gnu/");
}
let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
);
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
);
if (cudaExists) {
cudaVersion = "11";
}
} else {
cudaVersion = "12";
}
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
data["cuda"].exist = cudaExists;
data["cuda"].version = cudaVersion;
if (cudaExists) {
data.run_mode = "gpu";
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
}
/**
* Get GPU information
*/
async function updateGpuInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
if (!error) {
// Get GPU info and gpu has higher memory first
let highestVram = 0;
let highestVramId = "0";
let gpus = stdout
.trim()
.split("\n")
.map((line) => {
let [id, vram] = line.split(", ");
vram = vram.replace(/\r/g, "");
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram);
highestVramId = id;
}
return { id, vram };
});
data["gpus"] = gpus;
data["gpu_highest_vram"] = highestVramId;
} else {
data["gpus"] = [];
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
Promise.resolve();
}
);
}
module.exports = {
initModel,
stopModel,
killSubprocess,
dispose,
updateNvidiaInfo,
getCurrentNitroProcessInfo,
};

View File

@ -0,0 +1,65 @@
import { readFileSync } from "fs";
import * as path from "path";
import { NVIDIA_INFO_FILE } from "./nvidia";
export interface NitroExecutableOptions {
executablePath: string;
cudaVisibleDevices: string;
}
/**
* Find which executable file to run based on the current platform.
* @returns The name of the executable file to run.
*/
export const executableNitroFile = (): NitroExecutableOptions => {
let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
let cudaVisibleDevices = "";
let binaryName = "nitro";
/**
* The binary folder is different for each platform.
*/
if (process.platform === "win32") {
/**
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
*/
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "win-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
} else {
binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
}
binaryName = "nitro.exe";
} else if (process.platform === "darwin") {
/**
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
*/
if (process.arch === "arm64") {
binaryFolder = path.join(binaryFolder, "mac-arm64");
} else {
binaryFolder = path.join(binaryFolder, "mac-x64");
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
*/
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
if (nvidiaInfo["run_mode"] === "cpu") {
binaryFolder = path.join(binaryFolder, "linux-cpu");
} else {
if (nvidiaInfo["cuda"].version === "12") {
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
} else {
binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
}
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
}
}
return {
executablePath: path.join(binaryFolder, binaryName),
cudaVisibleDevices,
};
};

View File

@ -0,0 +1,379 @@
import fs from "fs";
import path from "path";
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import tcpPortUsed from "tcp-port-used";
import fetchRT from "fetch-retry";
import osUtils from "os-utils";
import { log } from "@janhq/core/node";
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
import { executableNitroFile } from "./execute";
import { homedir } from "os";
// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch);
/**
* The response object for model init operation.
*/
interface ModelInitOptions {
modelFullPath: string;
model: Model;
}
/**
* The response object of Prompt Template parsing.
*/
interface PromptTemplate {
system_prompt?: string;
ai_prompt?: string;
user_prompt?: string;
error?: string;
}
/**
* Model setting args for Nitro model load.
*/
interface ModelSettingArgs extends ModelSettingParams {
llama_model_path: string;
cpu_threads: number;
}
// The PORT to use for the Nitro subprocess
const PORT = 3928;
// The HOST address to use for the Nitro subprocess
const LOCAL_HOST = "127.0.0.1";
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
// The URL for the Nitro subprocess to load a model
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
// The URL for the Nitro subprocess to validate a model
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
// The URL for the Nitro subprocess to kill itself
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
// The supported model format
// TODO: Should be an array to support more models
const SUPPORTED_MODEL_FORMAT = ".gguf";
// The subprocess instance for Nitro
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
// The current model file url
let currentModelFile: string = "";
// The current model settings
let currentSettings: ModelSettingArgs | undefined = undefined;
/**
* Stops a Nitro subprocess.
* @param wrapper - The model wrapper.
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
function stopModel(): Promise<void> {
return killSubprocess();
}
/**
* Initializes a Nitro subprocess to load a machine learning model.
* @param wrapper - The model wrapper.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
return Promise.resolve();
}
currentModelFile = wrapper.modelFullPath;
const janRoot = path.join(homedir(), "jan");
if (!currentModelFile.includes(janRoot)) {
currentModelFile = path.join(janRoot, currentModelFile);
}
const files: string[] = fs.readdirSync(currentModelFile);
// Look for GGUF model file
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
);
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
currentModelFile = path.join(currentModelFile, ggufBinFile);
if (wrapper.model.engine !== InferenceEngine.nitro) {
return Promise.reject("Not a nitro model");
} else {
const nitroResourceProbe = await getResourcesInfo();
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (wrapper.model.settings.prompt_template) {
const promptTemplate = wrapper.model.settings.prompt_template;
const prompt = promptTemplateConverter(promptTemplate);
if (prompt?.error) {
return Promise.reject(prompt.error);
}
wrapper.model.settings.system_prompt = prompt.system_prompt;
wrapper.model.settings.user_prompt = prompt.user_prompt;
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
}
currentSettings = {
llama_model_path: currentModelFile,
...wrapper.model.settings,
// This is critical and requires real system information
cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
};
return runNitroAndLoadModel();
}
}
/**
* 1. Spawn Nitro process
* 2. Load model into Nitro subprocess
* 3. Validate model status
* @returns
*/
async function runNitroAndLoadModel() {
// Gather system information for CPU physical cores and memory
return killSubprocess()
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
.then(() => {
/**
* There is a problem with Windows process manager
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === "win32") {
return new Promise((resolve) => setTimeout(resolve, 500));
} else {
return Promise.resolve();
}
})
.then(spawnNitroProcess)
.then(() => loadLLMModel(currentSettings))
.then(validateModelStatus)
.catch((err) => {
// TODO: Broadcast error so app could display proper error message
log(`[NITRO]::Error: ${err}`);
return { error: err };
});
}
/**
* Parse prompt template into agrs settings
* @param promptTemplate Template as string
* @returns
*/
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
// Split the string using the markers
const systemMarker = "{system_message}";
const promptMarker = "{prompt}";
if (
promptTemplate.includes(systemMarker) &&
promptTemplate.includes(promptMarker)
) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker);
const promptIndex = promptTemplate.indexOf(promptMarker);
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
);
// Return the split parts
return { system_prompt, user_prompt, ai_prompt };
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length
);
// Return the split parts
return { user_prompt, ai_prompt };
}
// Return an error if none of the conditions are met
return { error: "Cannot split prompt template" };
}
/**
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/
function loadLLMModel(settings: any): Promise<Response> {
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
})
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res
)}`
);
return Promise.resolve(res);
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
return Promise.reject();
});
}
/**
* Validates the status of a model.
* @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
* If the model is loaded successfully, the object is empty.
* If the model is not loaded successfully, the object contains an error message.
*/
async function validateModelStatus(): Promise<void> {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: "GET",
headers: {
"Content-Type": "application/json",
},
retries: 5,
retryDelay: 500,
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res
)}`
);
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json();
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
return Promise.resolve();
}
}
return Promise.reject("Validate model status failed");
});
}
/**
* Terminates the Nitro subprocess.
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
async function killSubprocess(): Promise<void> {
const controller = new AbortController();
setTimeout(() => controller.abort(), 5000);
log(`[NITRO]::Debug: Request to kill Nitro`);
return fetch(NITRO_HTTP_KILL_URL, {
method: "DELETE",
signal: controller.signal,
})
.then(() => {
subprocess?.kill();
subprocess = undefined;
})
.catch(() => {})
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
}
/**
* Spawns a Nitro subprocess.
* @returns A promise that resolves when the Nitro subprocess is started.
*/
function spawnNitroProcess(): Promise<any> {
log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
return new Promise<void>(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
let executableOptions = executableNitroFile();
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
);
subprocess = spawn(
executableOptions.executablePath,
["1", LOCAL_HOST, PORT.toString()],
{
cwd: binaryFolder,
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
);
// Handle subprocess output
subprocess.stdout.on("data", (data: any) => {
log(`[NITRO]::Debug: ${data}`);
});
subprocess.stderr.on("data", (data: any) => {
log(`[NITRO]::Error: ${data}`);
});
subprocess.on("close", (code: any) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
subprocess = undefined;
reject(`child process exited with code ${code}`);
});
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
log(`[NITRO]::Debug: Nitro is ready`);
resolve();
});
});
}
/**
* Get the system resources information
* TODO: Move to Core so that it can be reused
*/
function getResourcesInfo(): Promise<ResourcesInfo> {
return new Promise(async (resolve) => {
const cpu = await osUtils.cpuCount();
log(`[NITRO]::CPU informations - ${cpu}`);
const response: ResourcesInfo = {
numCpuPhysicalCore: cpu,
memAvailable: 0,
};
resolve(response);
});
}
/**
* Every module should have a dispose function
* This will be called when the extension is unloaded and should clean up any resources
* Also called when app is closed
*/
function dispose() {
// clean other registered resources here
killSubprocess();
}
export default {
runModel,
stopModel,
killSubprocess,
dispose,
updateNvidiaInfo,
getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
};

View File

@ -0,0 +1,201 @@
import { writeFileSync, existsSync, readFileSync } from "fs";
import { exec } from "child_process";
import path from "path";
import { homedir } from "os";
/**
* Default GPU settings
**/
const DEFALT_SETTINGS = {
notify: true,
run_mode: "cpu",
nvidia_driver: {
exist: false,
version: "",
},
cuda: {
exist: false,
version: "",
},
gpus: [],
gpu_highest_vram: "",
};
/**
* Path to the settings file
**/
export const NVIDIA_INFO_FILE = path.join(
homedir(),
"jan",
"settings",
"settings.json"
);
/**
* Current nitro process
*/
let nitroProcessInfo: NitroProcessInfo | undefined = undefined;
/**
* Nitro process info
*/
export interface NitroProcessInfo {
isRunning: boolean
}
/**
* This will retrive GPU informations and persist settings.json
* Will be called when the extension is loaded to turn on GPU acceleration if supported
*/
export async function updateNvidiaInfo() {
if (process.platform !== "darwin") {
await Promise.all([
updateNvidiaDriverInfo(),
updateCudaExistence(),
updateGpuInfo(),
]);
}
}
/**
* Retrieve current nitro process
*/
export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
nitroProcessInfo = {
isRunning: subprocess != null,
};
return nitroProcessInfo;
};
/**
* Validate nvidia and cuda for linux and windows
*/
export async function updateNvidiaDriverInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=driver_version --format=csv,noheader",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
if (!error) {
const firstLine = stdout.split("\n")[0].trim();
data["nvidia_driver"].exist = true;
data["nvidia_driver"].version = firstLine;
} else {
data["nvidia_driver"].exist = false;
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
Promise.resolve();
}
);
}
/**
* Check if file exists in paths
*/
export function checkFileExistenceInPaths(
file: string,
paths: string[]
): boolean {
return paths.some((p) => existsSync(path.join(p, file)));
}
/**
* Validate cuda for linux and windows
*/
export function updateCudaExistence() {
let filesCuda12: string[];
let filesCuda11: string[];
let paths: string[];
let cudaVersion: string = "";
if (process.platform === "win32") {
filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
} else {
filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
paths = process.env.LD_LIBRARY_PATH
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
: [];
paths.push("/usr/lib/x86_64-linux-gnu/");
}
let cudaExists = filesCuda12.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
);
if (!cudaExists) {
cudaExists = filesCuda11.every(
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
);
if (cudaExists) {
cudaVersion = "11";
}
} else {
cudaVersion = "12";
}
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
data["cuda"].exist = cudaExists;
data["cuda"].version = cudaVersion;
if (cudaExists) {
data.run_mode = "gpu";
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
}
/**
* Get GPU information
*/
export async function updateGpuInfo(): Promise<void> {
exec(
"nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
(error, stdout) => {
let data;
try {
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
} catch (error) {
data = DEFALT_SETTINGS;
}
if (!error) {
// Get GPU info and gpu has higher memory first
let highestVram = 0;
let highestVramId = "0";
let gpus = stdout
.trim()
.split("\n")
.map((line) => {
let [id, vram] = line.split(", ");
vram = vram.replace(/\r/g, "");
if (parseFloat(vram) > highestVram) {
highestVram = parseFloat(vram);
highestVramId = id;
}
return { id, vram };
});
data["gpus"] = gpus;
data["gpu_highest_vram"] = highestVramId;
} else {
data["gpus"] = [];
}
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
Promise.resolve();
}
);
}

View File

@ -1,15 +1,19 @@
{
"compilerOptions": {
"target": "es2016",
"module": "ES6",
"moduleResolution": "node",
"outDir": "./dist",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": false,
"skipLibCheck": true,
"rootDir": "./src"
"target": "es5",
"module": "ES2020",
"lib": ["es2015", "es2016", "es2017", "dom"],
"strict": true,
"sourceMap": true,
"declaration": true,
"allowSyntheticDefaultImports": true,
"experimentalDecorators": true,
"emitDecoratorMetadata": true,
"declarationDir": "dist/types",
"outDir": "dist",
"importHelpers": true,
"typeRoots": ["node_modules/@types"]
},
"include": ["./src"]
"include": ["src"]
}

View File

@ -1,43 +0,0 @@
const path = require("path");
const webpack = require("webpack");
const packageJson = require("./package.json");
module.exports = {
experiments: { outputModule: true },
entry: "./src/index.ts", // Adjust the entry point to match your project's main file
mode: "production",
module: {
rules: [
{
test: /\.tsx?$/,
use: "ts-loader",
exclude: /node_modules/,
},
],
},
plugins: [
new webpack.DefinePlugin({
MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
INFERENCE_URL: JSON.stringify(
process.env.INFERENCE_URL ||
"http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
),
TROUBLESHOOTING_URL: JSON.stringify("https://jan.ai/guides/troubleshooting")
}),
],
output: {
filename: "index.js", // Adjust the output file name as needed
path: path.resolve(__dirname, "dist"),
library: { type: "module" }, // Specify ESM output format
},
resolve: {
extensions: [".ts", ".js"],
fallback: {
path: require.resolve("path-browserify"),
},
},
optimization: {
minimize: false,
},
// Add loaders and other configuration as needed for your project
};