refactor: introduce node module in nitro extension (#1630)
This commit is contained in:
parent
db987e88f9
commit
f4f861d0e9
@ -15,13 +15,6 @@
|
|||||||
"dist"
|
"dist"
|
||||||
],
|
],
|
||||||
"author": "Jan <service@jan.ai>",
|
"author": "Jan <service@jan.ai>",
|
||||||
"repository": {
|
|
||||||
"type": "git",
|
|
||||||
"url": ""
|
|
||||||
},
|
|
||||||
"engines": {
|
|
||||||
"node": ">=6.0.0"
|
|
||||||
},
|
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/core.umd.js",
|
".": "./dist/core.umd.js",
|
||||||
"./sdk": "./dist/core.umd.js",
|
"./sdk": "./dist/core.umd.js",
|
||||||
@ -49,53 +42,6 @@
|
|||||||
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||||
"start": "rollup -c rollup.config.ts -w"
|
"start": "rollup -c rollup.config.ts -w"
|
||||||
},
|
},
|
||||||
"lint-staged": {
|
|
||||||
"{src,test}/**/*.ts": [
|
|
||||||
"prettier --write",
|
|
||||||
"git add"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"config": {
|
|
||||||
"commitizen": {
|
|
||||||
"path": "node_modules/cz-conventional-changelog"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"jest": {
|
|
||||||
"transform": {
|
|
||||||
".(ts|tsx)": "ts-jest"
|
|
||||||
},
|
|
||||||
"testEnvironment": "node",
|
|
||||||
"testRegex": "(/__tests__/.*|\\.(test|spec))\\.(ts|tsx|js)$",
|
|
||||||
"moduleFileExtensions": [
|
|
||||||
"ts",
|
|
||||||
"tsx",
|
|
||||||
"js"
|
|
||||||
],
|
|
||||||
"coveragePathIgnorePatterns": [
|
|
||||||
"/node_modules/",
|
|
||||||
"/test/"
|
|
||||||
],
|
|
||||||
"coverageThreshold": {
|
|
||||||
"global": {
|
|
||||||
"branches": 90,
|
|
||||||
"functions": 95,
|
|
||||||
"lines": 95,
|
|
||||||
"statements": 95
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"collectCoverageFrom": [
|
|
||||||
"src/*.{js,ts}"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"prettier": {
|
|
||||||
"semi": false,
|
|
||||||
"singleQuote": true
|
|
||||||
},
|
|
||||||
"commitlint": {
|
|
||||||
"extends": [
|
|
||||||
"@commitlint/config-conventional"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^12.0.2",
|
"@types/node": "^12.0.2",
|
||||||
"rollup": "^2.38.5",
|
"rollup": "^2.38.5",
|
||||||
@ -104,7 +50,6 @@
|
|||||||
"rollup-plugin-node-resolve": "^5.2.0",
|
"rollup-plugin-node-resolve": "^5.2.0",
|
||||||
"rollup-plugin-sourcemaps": "^0.6.3",
|
"rollup-plugin-sourcemaps": "^0.6.3",
|
||||||
"rollup-plugin-typescript2": "^0.36.0",
|
"rollup-plugin-typescript2": "^0.36.0",
|
||||||
"ts-node": "^7.0.1",
|
|
||||||
"tslib": "^2.6.2",
|
"tslib": "^2.6.2",
|
||||||
"typescript": "^5.2.2"
|
"typescript": "^5.2.2"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -104,6 +104,9 @@ export type ModelSettingParams = {
|
|||||||
n_parallel?: number
|
n_parallel?: number
|
||||||
cpu_threads?: number
|
cpu_threads?: number
|
||||||
prompt_template?: string
|
prompt_template?: string
|
||||||
|
system_prompt?: string
|
||||||
|
ai_prompt?: string
|
||||||
|
user_prompt?: string
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -3,11 +3,11 @@
|
|||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See nitro.jan.ai",
|
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See nitro.jan.ai",
|
||||||
"main": "dist/index.js",
|
"main": "dist/index.js",
|
||||||
"module": "dist/module.js",
|
"node": "dist/node/index.cjs.js",
|
||||||
"author": "Jan <service@jan.ai>",
|
"author": "Jan <service@jan.ai>",
|
||||||
"license": "AGPL-3.0",
|
"license": "AGPL-3.0",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "tsc -b . && webpack --config webpack.config.js",
|
"build": "tsc --module commonjs && rollup -c rollup.config.ts",
|
||||||
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro",
|
"downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-12-0.tar.gz -e --strip 1 -o ./bin/linux-cuda-12-0 && chmod +x ./bin/linux-cuda-12-0/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda-11-7.tar.gz -e --strip 1 -o ./bin/linux-cuda-11-7 && chmod +x ./bin/linux-cuda-11-7/nitro",
|
||||||
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
|
"downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro",
|
||||||
"downloadnitro:win32": "download.bat",
|
"downloadnitro:win32": "download.bat",
|
||||||
@ -19,24 +19,33 @@
|
|||||||
},
|
},
|
||||||
"exports": {
|
"exports": {
|
||||||
".": "./dist/index.js",
|
".": "./dist/index.js",
|
||||||
"./main": "./dist/module.js"
|
"./main": "./dist/node/index.cjs.js"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
|
"@rollup/plugin-commonjs": "^25.0.7",
|
||||||
|
"@rollup/plugin-json": "^6.1.0",
|
||||||
|
"@rollup/plugin-node-resolve": "^15.2.3",
|
||||||
|
"@types/node": "^20.11.4",
|
||||||
|
"@types/tcp-port-used": "^1.0.4",
|
||||||
"cpx": "^1.5.0",
|
"cpx": "^1.5.0",
|
||||||
|
"download-cli": "^1.1.1",
|
||||||
"rimraf": "^3.0.2",
|
"rimraf": "^3.0.2",
|
||||||
|
"rollup": "^2.38.5",
|
||||||
|
"rollup-plugin-define": "^1.0.1",
|
||||||
|
"rollup-plugin-sourcemaps": "^0.6.3",
|
||||||
|
"rollup-plugin-typescript2": "^0.36.0",
|
||||||
"run-script-os": "^1.1.6",
|
"run-script-os": "^1.1.6",
|
||||||
"webpack": "^5.88.2",
|
"typescript": "^5.3.3"
|
||||||
"webpack-cli": "^5.1.4"
|
|
||||||
},
|
},
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@janhq/core": "file:../../core",
|
"@janhq/core": "file:../../core",
|
||||||
"download-cli": "^1.1.1",
|
"@rollup/plugin-replace": "^5.0.5",
|
||||||
|
"@types/os-utils": "^0.0.4",
|
||||||
"fetch-retry": "^5.0.6",
|
"fetch-retry": "^5.0.6",
|
||||||
"os-utils": "^0.0.14",
|
"os-utils": "^0.0.14",
|
||||||
"path-browserify": "^1.0.1",
|
"path-browserify": "^1.0.1",
|
||||||
"rxjs": "^7.8.1",
|
"rxjs": "^7.8.1",
|
||||||
"tcp-port-used": "^1.0.2",
|
"tcp-port-used": "^1.0.2",
|
||||||
"ts-loader": "^9.5.0",
|
|
||||||
"ulid": "^2.3.0"
|
"ulid": "^2.3.0"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
|
|||||||
77
extensions/inference-nitro-extension/rollup.config.ts
Normal file
77
extensions/inference-nitro-extension/rollup.config.ts
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
import resolve from "@rollup/plugin-node-resolve";
|
||||||
|
import commonjs from "@rollup/plugin-commonjs";
|
||||||
|
import sourceMaps from "rollup-plugin-sourcemaps";
|
||||||
|
import typescript from "rollup-plugin-typescript2";
|
||||||
|
import json from "@rollup/plugin-json";
|
||||||
|
import replace from "@rollup/plugin-replace";
|
||||||
|
const packageJson = require("./package.json");
|
||||||
|
|
||||||
|
const pkg = require("./package.json");
|
||||||
|
|
||||||
|
export default [
|
||||||
|
{
|
||||||
|
input: `src/index.ts`,
|
||||||
|
output: [{ file: pkg.main, format: "es", sourcemap: true }],
|
||||||
|
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
||||||
|
external: [],
|
||||||
|
watch: {
|
||||||
|
include: "src/**",
|
||||||
|
},
|
||||||
|
plugins: [
|
||||||
|
replace({
|
||||||
|
NODE: JSON.stringify(`${packageJson.name}/${packageJson.node}`),
|
||||||
|
INFERENCE_URL: JSON.stringify(
|
||||||
|
process.env.INFERENCE_URL ||
|
||||||
|
"http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
|
||||||
|
),
|
||||||
|
TROUBLESHOOTING_URL: JSON.stringify(
|
||||||
|
"https://jan.ai/guides/troubleshooting"
|
||||||
|
),
|
||||||
|
}),
|
||||||
|
// Allow json resolution
|
||||||
|
json(),
|
||||||
|
// Compile TypeScript files
|
||||||
|
typescript({ useTsconfigDeclarationDir: true }),
|
||||||
|
// Compile TypeScript files
|
||||||
|
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||||
|
commonjs(),
|
||||||
|
// Allow node_modules resolution, so you can use 'external' to control
|
||||||
|
// which external modules to include in the bundle
|
||||||
|
// https://github.com/rollup/rollup-plugin-node-resolve#usage
|
||||||
|
resolve({
|
||||||
|
extensions: [".js", ".ts", ".svelte"],
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Resolve source maps to the original source
|
||||||
|
sourceMaps(),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
input: `src/node/index.ts`,
|
||||||
|
output: [
|
||||||
|
{ file: "dist/node/index.cjs.js", format: "cjs", sourcemap: true },
|
||||||
|
],
|
||||||
|
// Indicate here external modules you don't wanna include in your bundle (i.e.: 'lodash')
|
||||||
|
external: ["@janhq/core/node"],
|
||||||
|
watch: {
|
||||||
|
include: "src/node/**",
|
||||||
|
},
|
||||||
|
plugins: [
|
||||||
|
// Allow json resolution
|
||||||
|
json(),
|
||||||
|
// Compile TypeScript files
|
||||||
|
typescript({ useTsconfigDeclarationDir: true }),
|
||||||
|
// Allow bundling cjs modules (unlike webpack, rollup doesn't understand cjs)
|
||||||
|
commonjs(),
|
||||||
|
// Allow node_modules resolution, so you can use 'external' to control
|
||||||
|
// which external modules to include in the bundle
|
||||||
|
// https://github.com/rollup/rollup-plugin-node-resolve#usage
|
||||||
|
resolve({
|
||||||
|
extensions: [".ts", ".js", ".json"],
|
||||||
|
}),
|
||||||
|
|
||||||
|
// Resolve source maps to the original source
|
||||||
|
sourceMaps(),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
];
|
||||||
@ -1,4 +1,4 @@
|
|||||||
declare const MODULE: string;
|
declare const NODE: string;
|
||||||
declare const INFERENCE_URL: string;
|
declare const INFERENCE_URL: string;
|
||||||
declare const TROUBLESHOOTING_URL: string;
|
declare const TROUBLESHOOTING_URL: string;
|
||||||
|
|
||||||
|
|||||||
@ -26,7 +26,6 @@ import {
|
|||||||
} from "@janhq/core";
|
} from "@janhq/core";
|
||||||
import { requestInference } from "./helpers/sse";
|
import { requestInference } from "./helpers/sse";
|
||||||
import { ulid } from "ulid";
|
import { ulid } from "ulid";
|
||||||
import { join } from "path";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
* A class that implements the InferenceExtension interface from the @janhq/core package.
|
||||||
@ -43,7 +42,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
*/
|
*/
|
||||||
private static readonly _intervalHealthCheck = 5 * 1000;
|
private static readonly _intervalHealthCheck = 5 * 1000;
|
||||||
|
|
||||||
private _currentModel: Model;
|
private _currentModel: Model | undefined;
|
||||||
|
|
||||||
private _engineSettings: EngineSettings = {
|
private _engineSettings: EngineSettings = {
|
||||||
ctx_len: 2048,
|
ctx_len: 2048,
|
||||||
@ -82,7 +81,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
|
if (!(await fs.existsSync(JanInferenceNitroExtension._homeDir))) {
|
||||||
await fs
|
await fs
|
||||||
.mkdirSync(JanInferenceNitroExtension._homeDir)
|
.mkdirSync(JanInferenceNitroExtension._homeDir)
|
||||||
.catch((err) => console.debug(err));
|
.catch((err: Error) => console.debug(err));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
|
if (!(await fs.existsSync(JanInferenceNitroExtension._settingsDir)))
|
||||||
@ -90,7 +89,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
this.writeDefaultEngineSettings();
|
this.writeDefaultEngineSettings();
|
||||||
|
|
||||||
// Events subscription
|
// Events subscription
|
||||||
events.on(EventName.OnMessageSent, (data) => this.onMessageRequest(data));
|
events.on(EventName.OnMessageSent, (data: MessageRequest) =>
|
||||||
|
this.onMessageRequest(data)
|
||||||
|
);
|
||||||
|
|
||||||
events.on(EventName.OnModelInit, (model: Model) => this.onModelInit(model));
|
events.on(EventName.OnModelInit, (model: Model) => this.onModelInit(model));
|
||||||
|
|
||||||
@ -99,7 +100,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
events.on(EventName.OnInferenceStopped, () => this.onInferenceStopped());
|
events.on(EventName.OnInferenceStopped, () => this.onInferenceStopped());
|
||||||
|
|
||||||
// Attempt to fetch nvidia info
|
// Attempt to fetch nvidia info
|
||||||
await executeOnMain(MODULE, "updateNvidiaInfo", {});
|
await executeOnMain(NODE, "updateNvidiaInfo", {});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -109,10 +110,10 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
|
|
||||||
private async writeDefaultEngineSettings() {
|
private async writeDefaultEngineSettings() {
|
||||||
try {
|
try {
|
||||||
const engineFile = join(
|
const engineFile = await joinPath([
|
||||||
JanInferenceNitroExtension._homeDir,
|
JanInferenceNitroExtension._homeDir,
|
||||||
JanInferenceNitroExtension._engineMetadataFileName
|
JanInferenceNitroExtension._engineMetadataFileName,
|
||||||
);
|
]);
|
||||||
if (await fs.existsSync(engineFile)) {
|
if (await fs.existsSync(engineFile)) {
|
||||||
const engine = await fs.readFileSync(engineFile, "utf-8");
|
const engine = await fs.readFileSync(engineFile, "utf-8");
|
||||||
this._engineSettings =
|
this._engineSettings =
|
||||||
@ -133,12 +134,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
|
|
||||||
const modelFullPath = await joinPath(["models", model.id]);
|
const modelFullPath = await joinPath(["models", model.id]);
|
||||||
|
|
||||||
const nitroInitResult = await executeOnMain(MODULE, "initModel", {
|
const nitroInitResult = await executeOnMain(NODE, "runModel", {
|
||||||
modelFullPath: modelFullPath,
|
modelFullPath,
|
||||||
model: model,
|
model,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (nitroInitResult.error === null) {
|
if (nitroInitResult?.error) {
|
||||||
events.emit(EventName.OnModelFail, model);
|
events.emit(EventName.OnModelFail, model);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -155,12 +156,11 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
private async onModelStop(model: Model) {
|
private async onModelStop(model: Model) {
|
||||||
if (model.engine !== "nitro") return;
|
if (model.engine !== "nitro") return;
|
||||||
|
|
||||||
await executeOnMain(MODULE, "stopModel");
|
await executeOnMain(NODE, "stopModel");
|
||||||
events.emit(EventName.OnModelStopped, {});
|
events.emit(EventName.OnModelStopped, {});
|
||||||
|
|
||||||
// stop the periocally health check
|
// stop the periocally health check
|
||||||
if (this.getNitroProcesHealthIntervalId) {
|
if (this.getNitroProcesHealthIntervalId) {
|
||||||
console.debug("Stop calling Nitro process health check");
|
|
||||||
clearInterval(this.getNitroProcesHealthIntervalId);
|
clearInterval(this.getNitroProcesHealthIntervalId);
|
||||||
this.getNitroProcesHealthIntervalId = undefined;
|
this.getNitroProcesHealthIntervalId = undefined;
|
||||||
}
|
}
|
||||||
@ -170,7 +170,7 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
* Periodically check for nitro process's health.
|
* Periodically check for nitro process's health.
|
||||||
*/
|
*/
|
||||||
private async periodicallyGetNitroHealth(): Promise<void> {
|
private async periodicallyGetNitroHealth(): Promise<void> {
|
||||||
const health = await executeOnMain(MODULE, "getCurrentNitroProcessInfo");
|
const health = await executeOnMain(NODE, "getCurrentNitroProcessInfo");
|
||||||
|
|
||||||
const isRunning = this.nitroProcessInfo?.isRunning ?? false;
|
const isRunning = this.nitroProcessInfo?.isRunning ?? false;
|
||||||
if (isRunning && health.isRunning === false) {
|
if (isRunning && health.isRunning === false) {
|
||||||
@ -204,6 +204,8 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
};
|
};
|
||||||
|
|
||||||
return new Promise(async (resolve, reject) => {
|
return new Promise(async (resolve, reject) => {
|
||||||
|
if (!this._currentModel) return Promise.reject("No model loaded");
|
||||||
|
|
||||||
requestInference(data.messages ?? [], this._currentModel).subscribe({
|
requestInference(data.messages ?? [], this._currentModel).subscribe({
|
||||||
next: (_content) => {},
|
next: (_content) => {},
|
||||||
complete: async () => {
|
complete: async () => {
|
||||||
@ -223,7 +225,9 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
* @param {MessageRequest} data - The data for the new message request.
|
* @param {MessageRequest} data - The data for the new message request.
|
||||||
*/
|
*/
|
||||||
private async onMessageRequest(data: MessageRequest) {
|
private async onMessageRequest(data: MessageRequest) {
|
||||||
if (data.model.engine !== "nitro") return;
|
if (data.model?.engine !== InferenceEngine.nitro || !this._currentModel) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const timestamp = Date.now();
|
const timestamp = Date.now();
|
||||||
const message: ThreadMessage = {
|
const message: ThreadMessage = {
|
||||||
@ -242,11 +246,12 @@ export default class JanInferenceNitroExtension implements InferenceExtension {
|
|||||||
this.isCancelled = false;
|
this.isCancelled = false;
|
||||||
this.controller = new AbortController();
|
this.controller = new AbortController();
|
||||||
|
|
||||||
requestInference(
|
// @ts-ignore
|
||||||
data.messages ?? [],
|
const model: Model = {
|
||||||
{ ...this._currentModel, ...data.model },
|
...(this._currentModel || {}),
|
||||||
this.controller
|
...(data.model || {}),
|
||||||
).subscribe({
|
};
|
||||||
|
requestInference(data.messages ?? [], model, this.controller).subscribe({
|
||||||
next: (content) => {
|
next: (content) => {
|
||||||
const messageContent: ThreadContent = {
|
const messageContent: ThreadContent = {
|
||||||
type: ContentType.Text,
|
type: ContentType.Text,
|
||||||
|
|||||||
@ -1,514 +0,0 @@
|
|||||||
const fs = require("fs");
|
|
||||||
const path = require("path");
|
|
||||||
const { exec, spawn } = require("child_process");
|
|
||||||
const tcpPortUsed = require("tcp-port-used");
|
|
||||||
const fetchRetry = require("fetch-retry")(global.fetch);
|
|
||||||
const osUtils = require("os-utils");
|
|
||||||
const { readFileSync, writeFileSync, existsSync } = require("fs");
|
|
||||||
const { log } = require("@janhq/core/node");
|
|
||||||
|
|
||||||
// The PORT to use for the Nitro subprocess
|
|
||||||
const PORT = 3928;
|
|
||||||
const LOCAL_HOST = "127.0.0.1";
|
|
||||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
|
|
||||||
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
|
|
||||||
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
|
|
||||||
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
|
|
||||||
const SUPPORTED_MODEL_FORMAT = ".gguf";
|
|
||||||
const NVIDIA_INFO_FILE = path.join(
|
|
||||||
require("os").homedir(),
|
|
||||||
"jan",
|
|
||||||
"settings",
|
|
||||||
"settings.json"
|
|
||||||
);
|
|
||||||
|
|
||||||
// The subprocess instance for Nitro
|
|
||||||
let subprocess = undefined;
|
|
||||||
let currentModelFile: string = undefined;
|
|
||||||
let currentSettings = undefined;
|
|
||||||
|
|
||||||
let nitroProcessInfo = undefined;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Default GPU settings
|
|
||||||
**/
|
|
||||||
const DEFALT_SETTINGS = {
|
|
||||||
notify: true,
|
|
||||||
run_mode: "cpu",
|
|
||||||
nvidia_driver: {
|
|
||||||
exist: false,
|
|
||||||
version: "",
|
|
||||||
},
|
|
||||||
cuda: {
|
|
||||||
exist: false,
|
|
||||||
version: "",
|
|
||||||
},
|
|
||||||
gpus: [],
|
|
||||||
gpu_highest_vram: "",
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stops a Nitro subprocess.
|
|
||||||
* @param wrapper - The model wrapper.
|
|
||||||
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
|
|
||||||
*/
|
|
||||||
function stopModel(): Promise<void> {
|
|
||||||
return killSubprocess();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes a Nitro subprocess to load a machine learning model.
|
|
||||||
* @param wrapper - The model wrapper.
|
|
||||||
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
|
||||||
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
|
||||||
* TODO: Should it be startModel instead?
|
|
||||||
*/
|
|
||||||
async function initModel(wrapper: any): Promise<ModelOperationResponse> {
|
|
||||||
currentModelFile = wrapper.modelFullPath;
|
|
||||||
const janRoot = path.join(require("os").homedir(), "jan");
|
|
||||||
if (!currentModelFile.includes(janRoot)) {
|
|
||||||
currentModelFile = path.join(janRoot, currentModelFile);
|
|
||||||
}
|
|
||||||
const files: string[] = fs.readdirSync(currentModelFile);
|
|
||||||
|
|
||||||
// Look for GGUF model file
|
|
||||||
const ggufBinFile = files.find(
|
|
||||||
(file) =>
|
|
||||||
file === path.basename(currentModelFile) ||
|
|
||||||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
|
|
||||||
);
|
|
||||||
|
|
||||||
currentModelFile = path.join(currentModelFile, ggufBinFile);
|
|
||||||
|
|
||||||
if (wrapper.model.engine !== "nitro") {
|
|
||||||
return Promise.resolve({ error: "Not a nitro model" });
|
|
||||||
} else {
|
|
||||||
const nitroResourceProbe = await getResourcesInfo();
|
|
||||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
|
||||||
if (wrapper.model.settings.prompt_template) {
|
|
||||||
const promptTemplate = wrapper.model.settings.prompt_template;
|
|
||||||
const prompt = promptTemplateConverter(promptTemplate);
|
|
||||||
if (prompt.error) {
|
|
||||||
return Promise.resolve({ error: prompt.error });
|
|
||||||
}
|
|
||||||
wrapper.model.settings.system_prompt = prompt.system_prompt;
|
|
||||||
wrapper.model.settings.user_prompt = prompt.user_prompt;
|
|
||||||
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
|
|
||||||
}
|
|
||||||
|
|
||||||
currentSettings = {
|
|
||||||
llama_model_path: currentModelFile,
|
|
||||||
...wrapper.model.settings,
|
|
||||||
// This is critical and requires real system information
|
|
||||||
cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
|
|
||||||
};
|
|
||||||
return loadModel(nitroResourceProbe);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadModel(nitroResourceProbe: any | undefined) {
|
|
||||||
// Gather system information for CPU physical cores and memory
|
|
||||||
if (!nitroResourceProbe) nitroResourceProbe = await getResourcesInfo();
|
|
||||||
return killSubprocess()
|
|
||||||
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
|
||||||
.then(() => {
|
|
||||||
/**
|
|
||||||
* There is a problem with Windows process manager
|
|
||||||
* Should wait for awhile to make sure the port is free and subprocess is killed
|
|
||||||
* The tested threshold is 500ms
|
|
||||||
**/
|
|
||||||
if (process.platform === "win32") {
|
|
||||||
return new Promise((resolve) => setTimeout(resolve, 500));
|
|
||||||
} else {
|
|
||||||
return Promise.resolve();
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.then(() => spawnNitroProcess(nitroResourceProbe))
|
|
||||||
.then(() => loadLLMModel(currentSettings))
|
|
||||||
.then(validateModelStatus)
|
|
||||||
.catch((err) => {
|
|
||||||
log(`[NITRO]::Error: ${err}`);
|
|
||||||
// TODO: Broadcast error so app could display proper error message
|
|
||||||
return { error: err, currentModelFile };
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
function promptTemplateConverter(promptTemplate) {
|
|
||||||
// Split the string using the markers
|
|
||||||
const systemMarker = "{system_message}";
|
|
||||||
const promptMarker = "{prompt}";
|
|
||||||
|
|
||||||
if (
|
|
||||||
promptTemplate.includes(systemMarker) &&
|
|
||||||
promptTemplate.includes(promptMarker)
|
|
||||||
) {
|
|
||||||
// Find the indices of the markers
|
|
||||||
const systemIndex = promptTemplate.indexOf(systemMarker);
|
|
||||||
const promptIndex = promptTemplate.indexOf(promptMarker);
|
|
||||||
|
|
||||||
// Extract the parts of the string
|
|
||||||
const system_prompt = promptTemplate.substring(0, systemIndex);
|
|
||||||
const user_prompt = promptTemplate.substring(
|
|
||||||
systemIndex + systemMarker.length,
|
|
||||||
promptIndex
|
|
||||||
);
|
|
||||||
const ai_prompt = promptTemplate.substring(
|
|
||||||
promptIndex + promptMarker.length
|
|
||||||
);
|
|
||||||
|
|
||||||
// Return the split parts
|
|
||||||
return { system_prompt, user_prompt, ai_prompt };
|
|
||||||
} else if (promptTemplate.includes(promptMarker)) {
|
|
||||||
// Extract the parts of the string for the case where only promptMarker is present
|
|
||||||
const promptIndex = promptTemplate.indexOf(promptMarker);
|
|
||||||
const user_prompt = promptTemplate.substring(0, promptIndex);
|
|
||||||
const ai_prompt = promptTemplate.substring(
|
|
||||||
promptIndex + promptMarker.length
|
|
||||||
);
|
|
||||||
const system_prompt = "";
|
|
||||||
|
|
||||||
// Return the split parts
|
|
||||||
return { system_prompt, user_prompt, ai_prompt };
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return an error if none of the conditions are met
|
|
||||||
return { error: "Cannot split prompt template" };
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
|
|
||||||
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
|
||||||
*/
|
|
||||||
function loadLLMModel(settings): Promise<Response> {
|
|
||||||
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
|
|
||||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
|
||||||
method: "POST",
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
body: JSON.stringify(settings),
|
|
||||||
retries: 3,
|
|
||||||
retryDelay: 500,
|
|
||||||
}).catch((err) => {
|
|
||||||
log(`[NITRO]::Error: Load model failed with error ${err}`);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validates the status of a model.
|
|
||||||
* @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
|
|
||||||
* If the model is loaded successfully, the object is empty.
|
|
||||||
* If the model is not loaded successfully, the object contains an error message.
|
|
||||||
*/
|
|
||||||
async function validateModelStatus(): Promise<ModelOperationResponse> {
|
|
||||||
// Send a GET request to the validation URL.
|
|
||||||
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
|
||||||
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
|
||||||
method: "GET",
|
|
||||||
headers: {
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
retries: 5,
|
|
||||||
retryDelay: 500,
|
|
||||||
}).then(async (res: Response) => {
|
|
||||||
// If the response is OK, check model_loaded status.
|
|
||||||
if (res.ok) {
|
|
||||||
const body = await res.json();
|
|
||||||
// If the model is loaded, return an empty object.
|
|
||||||
// Otherwise, return an object with an error message.
|
|
||||||
if (body.model_loaded) {
|
|
||||||
return { error: undefined };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return { error: "Model loading failed" };
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Terminates the Nitro subprocess.
|
|
||||||
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
|
|
||||||
*/
|
|
||||||
async function killSubprocess(): Promise<void> {
|
|
||||||
const controller = new AbortController();
|
|
||||||
setTimeout(() => controller.abort(), 5000);
|
|
||||||
log(`[NITRO]::Debug: Request to kill Nitro`);
|
|
||||||
|
|
||||||
return fetch(NITRO_HTTP_KILL_URL, {
|
|
||||||
method: "DELETE",
|
|
||||||
signal: controller.signal,
|
|
||||||
})
|
|
||||||
.then(() => {
|
|
||||||
subprocess?.kill();
|
|
||||||
subprocess = undefined;
|
|
||||||
})
|
|
||||||
.catch(() => {})
|
|
||||||
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
|
||||||
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Spawns a Nitro subprocess.
|
|
||||||
* @param nitroResourceProbe - The Nitro resource probe.
|
|
||||||
* @returns A promise that resolves when the Nitro subprocess is started.
|
|
||||||
*/
|
|
||||||
function spawnNitroProcess(nitroResourceProbe: any): Promise<any> {
|
|
||||||
log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
|
|
||||||
|
|
||||||
return new Promise(async (resolve, reject) => {
|
|
||||||
let binaryFolder = path.join(__dirname, "bin"); // Current directory by default
|
|
||||||
let cudaVisibleDevices = "";
|
|
||||||
let binaryName;
|
|
||||||
if (process.platform === "win32") {
|
|
||||||
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
|
||||||
if (nvidiaInfo["run_mode"] === "cpu") {
|
|
||||||
binaryFolder = path.join(binaryFolder, "win-cpu");
|
|
||||||
} else {
|
|
||||||
if (nvidiaInfo["cuda"].version === "12") {
|
|
||||||
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
|
|
||||||
} else {
|
|
||||||
binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
|
|
||||||
}
|
|
||||||
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
|
|
||||||
}
|
|
||||||
binaryName = "nitro.exe";
|
|
||||||
} else if (process.platform === "darwin") {
|
|
||||||
if (process.arch === "arm64") {
|
|
||||||
binaryFolder = path.join(binaryFolder, "mac-arm64");
|
|
||||||
} else {
|
|
||||||
binaryFolder = path.join(binaryFolder, "mac-x64");
|
|
||||||
}
|
|
||||||
binaryName = "nitro";
|
|
||||||
} else {
|
|
||||||
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
|
||||||
if (nvidiaInfo["run_mode"] === "cpu") {
|
|
||||||
binaryFolder = path.join(binaryFolder, "linux-cpu");
|
|
||||||
} else {
|
|
||||||
if (nvidiaInfo["cuda"].version === "12") {
|
|
||||||
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
|
|
||||||
} else {
|
|
||||||
binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
|
|
||||||
}
|
|
||||||
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
|
|
||||||
}
|
|
||||||
binaryName = "nitro";
|
|
||||||
}
|
|
||||||
|
|
||||||
const binaryPath = path.join(binaryFolder, binaryName);
|
|
||||||
// Execute the binary
|
|
||||||
subprocess = spawn(binaryPath, ["1", LOCAL_HOST, PORT.toString()], {
|
|
||||||
cwd: binaryFolder,
|
|
||||||
env: {
|
|
||||||
...process.env,
|
|
||||||
CUDA_VISIBLE_DEVICES: cudaVisibleDevices,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// Handle subprocess output
|
|
||||||
subprocess.stdout.on("data", (data) => {
|
|
||||||
log(`[NITRO]::Debug: ${data}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
subprocess.stderr.on("data", (data) => {
|
|
||||||
log(`[NITRO]::Error: ${data}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
subprocess.on("close", (code) => {
|
|
||||||
log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
|
|
||||||
subprocess = null;
|
|
||||||
reject(`child process exited with code ${code}`);
|
|
||||||
});
|
|
||||||
|
|
||||||
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
|
|
||||||
resolve(nitroResourceProbe);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the system resources information
|
|
||||||
* TODO: Move to Core so that it can be reused
|
|
||||||
*/
|
|
||||||
function getResourcesInfo(): Promise<ResourcesInfo> {
|
|
||||||
return new Promise(async (resolve) => {
|
|
||||||
const cpu = await osUtils.cpuCount();
|
|
||||||
log(`[NITRO]::CPU informations - ${cpu}`);
|
|
||||||
const response: ResourcesInfo = {
|
|
||||||
numCpuPhysicalCore: cpu,
|
|
||||||
memAvailable: 0,
|
|
||||||
};
|
|
||||||
resolve(response);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This will retrive GPU informations and persist settings.json
|
|
||||||
* Will be called when the extension is loaded to turn on GPU acceleration if supported
|
|
||||||
*/
|
|
||||||
async function updateNvidiaInfo() {
|
|
||||||
if (process.platform !== "darwin") {
|
|
||||||
await Promise.all([
|
|
||||||
updateNvidiaDriverInfo(),
|
|
||||||
updateCudaExistence(),
|
|
||||||
updateGpuInfo(),
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Retrieve current nitro process
|
|
||||||
*/
|
|
||||||
const getCurrentNitroProcessInfo = (): Promise<any> => {
|
|
||||||
nitroProcessInfo = {
|
|
||||||
isRunning: subprocess != null,
|
|
||||||
};
|
|
||||||
return nitroProcessInfo;
|
|
||||||
};
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Every module should have a dispose function
|
|
||||||
* This will be called when the extension is unloaded and should clean up any resources
|
|
||||||
* Also called when app is closed
|
|
||||||
*/
|
|
||||||
function dispose() {
|
|
||||||
// clean other registered resources here
|
|
||||||
killSubprocess();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validate nvidia and cuda for linux and windows
|
|
||||||
*/
|
|
||||||
async function updateNvidiaDriverInfo(): Promise<void> {
|
|
||||||
exec(
|
|
||||||
"nvidia-smi --query-gpu=driver_version --format=csv,noheader",
|
|
||||||
(error, stdout) => {
|
|
||||||
let data;
|
|
||||||
try {
|
|
||||||
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
|
||||||
} catch (error) {
|
|
||||||
data = DEFALT_SETTINGS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!error) {
|
|
||||||
const firstLine = stdout.split("\n")[0].trim();
|
|
||||||
data["nvidia_driver"].exist = true;
|
|
||||||
data["nvidia_driver"].version = firstLine;
|
|
||||||
} else {
|
|
||||||
data["nvidia_driver"].exist = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
|
|
||||||
Promise.resolve();
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if file exists in paths
|
|
||||||
*/
|
|
||||||
function checkFileExistenceInPaths(file: string, paths: string[]): boolean {
|
|
||||||
return paths.some((p) => existsSync(path.join(p, file)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Validate cuda for linux and windows
|
|
||||||
*/
|
|
||||||
function updateCudaExistence() {
|
|
||||||
let filesCuda12: string[];
|
|
||||||
let filesCuda11: string[];
|
|
||||||
let paths: string[];
|
|
||||||
let cudaVersion: string = "";
|
|
||||||
|
|
||||||
if (process.platform === "win32") {
|
|
||||||
filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
|
|
||||||
filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
|
|
||||||
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
|
|
||||||
} else {
|
|
||||||
filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
|
|
||||||
filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
|
|
||||||
paths = process.env.LD_LIBRARY_PATH
|
|
||||||
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
|
|
||||||
: [];
|
|
||||||
paths.push("/usr/lib/x86_64-linux-gnu/");
|
|
||||||
}
|
|
||||||
|
|
||||||
let cudaExists = filesCuda12.every(
|
|
||||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
|
||||||
);
|
|
||||||
|
|
||||||
if (!cudaExists) {
|
|
||||||
cudaExists = filesCuda11.every(
|
|
||||||
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
|
||||||
);
|
|
||||||
if (cudaExists) {
|
|
||||||
cudaVersion = "11";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
cudaVersion = "12";
|
|
||||||
}
|
|
||||||
|
|
||||||
let data;
|
|
||||||
try {
|
|
||||||
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
|
||||||
} catch (error) {
|
|
||||||
data = DEFALT_SETTINGS;
|
|
||||||
}
|
|
||||||
|
|
||||||
data["cuda"].exist = cudaExists;
|
|
||||||
data["cuda"].version = cudaVersion;
|
|
||||||
if (cudaExists) {
|
|
||||||
data.run_mode = "gpu";
|
|
||||||
}
|
|
||||||
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get GPU information
|
|
||||||
*/
|
|
||||||
async function updateGpuInfo(): Promise<void> {
|
|
||||||
exec(
|
|
||||||
"nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
|
|
||||||
(error, stdout) => {
|
|
||||||
let data;
|
|
||||||
try {
|
|
||||||
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
|
||||||
} catch (error) {
|
|
||||||
data = DEFALT_SETTINGS;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!error) {
|
|
||||||
// Get GPU info and gpu has higher memory first
|
|
||||||
let highestVram = 0;
|
|
||||||
let highestVramId = "0";
|
|
||||||
let gpus = stdout
|
|
||||||
.trim()
|
|
||||||
.split("\n")
|
|
||||||
.map((line) => {
|
|
||||||
let [id, vram] = line.split(", ");
|
|
||||||
vram = vram.replace(/\r/g, "");
|
|
||||||
if (parseFloat(vram) > highestVram) {
|
|
||||||
highestVram = parseFloat(vram);
|
|
||||||
highestVramId = id;
|
|
||||||
}
|
|
||||||
return { id, vram };
|
|
||||||
});
|
|
||||||
|
|
||||||
data["gpus"] = gpus;
|
|
||||||
data["gpu_highest_vram"] = highestVramId;
|
|
||||||
} else {
|
|
||||||
data["gpus"] = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
|
|
||||||
Promise.resolve();
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
initModel,
|
|
||||||
stopModel,
|
|
||||||
killSubprocess,
|
|
||||||
dispose,
|
|
||||||
updateNvidiaInfo,
|
|
||||||
getCurrentNitroProcessInfo,
|
|
||||||
};
|
|
||||||
65
extensions/inference-nitro-extension/src/node/execute.ts
Normal file
65
extensions/inference-nitro-extension/src/node/execute.ts
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
import { readFileSync } from "fs";
|
||||||
|
import * as path from "path";
|
||||||
|
import { NVIDIA_INFO_FILE } from "./nvidia";
|
||||||
|
|
||||||
|
export interface NitroExecutableOptions {
|
||||||
|
executablePath: string;
|
||||||
|
cudaVisibleDevices: string;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Find which executable file to run based on the current platform.
|
||||||
|
* @returns The name of the executable file to run.
|
||||||
|
*/
|
||||||
|
export const executableNitroFile = (): NitroExecutableOptions => {
|
||||||
|
let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
|
||||||
|
let cudaVisibleDevices = "";
|
||||||
|
let binaryName = "nitro";
|
||||||
|
/**
|
||||||
|
* The binary folder is different for each platform.
|
||||||
|
*/
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
/**
|
||||||
|
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
|
||||||
|
*/
|
||||||
|
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
||||||
|
if (nvidiaInfo["run_mode"] === "cpu") {
|
||||||
|
binaryFolder = path.join(binaryFolder, "win-cpu");
|
||||||
|
} else {
|
||||||
|
if (nvidiaInfo["cuda"].version === "12") {
|
||||||
|
binaryFolder = path.join(binaryFolder, "win-cuda-12-0");
|
||||||
|
} else {
|
||||||
|
binaryFolder = path.join(binaryFolder, "win-cuda-11-7");
|
||||||
|
}
|
||||||
|
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
|
||||||
|
}
|
||||||
|
binaryName = "nitro.exe";
|
||||||
|
} else if (process.platform === "darwin") {
|
||||||
|
/**
|
||||||
|
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
|
||||||
|
*/
|
||||||
|
if (process.arch === "arm64") {
|
||||||
|
binaryFolder = path.join(binaryFolder, "mac-arm64");
|
||||||
|
} else {
|
||||||
|
binaryFolder = path.join(binaryFolder, "mac-x64");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/**
|
||||||
|
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
|
||||||
|
*/
|
||||||
|
let nvidiaInfo = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
||||||
|
if (nvidiaInfo["run_mode"] === "cpu") {
|
||||||
|
binaryFolder = path.join(binaryFolder, "linux-cpu");
|
||||||
|
} else {
|
||||||
|
if (nvidiaInfo["cuda"].version === "12") {
|
||||||
|
binaryFolder = path.join(binaryFolder, "linux-cuda-12-0");
|
||||||
|
} else {
|
||||||
|
binaryFolder = path.join(binaryFolder, "linux-cuda-11-7");
|
||||||
|
}
|
||||||
|
cudaVisibleDevices = nvidiaInfo["gpu_highest_vram"];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
executablePath: path.join(binaryFolder, binaryName),
|
||||||
|
cudaVisibleDevices,
|
||||||
|
};
|
||||||
|
};
|
||||||
379
extensions/inference-nitro-extension/src/node/index.ts
Normal file
379
extensions/inference-nitro-extension/src/node/index.ts
Normal file
@ -0,0 +1,379 @@
|
|||||||
|
import fs from "fs";
|
||||||
|
import path from "path";
|
||||||
|
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
||||||
|
import tcpPortUsed from "tcp-port-used";
|
||||||
|
import fetchRT from "fetch-retry";
|
||||||
|
import osUtils from "os-utils";
|
||||||
|
import { log } from "@janhq/core/node";
|
||||||
|
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
|
||||||
|
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
|
||||||
|
import { executableNitroFile } from "./execute";
|
||||||
|
import { homedir } from "os";
|
||||||
|
// Polyfill fetch with retry
|
||||||
|
const fetchRetry = fetchRT(fetch);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The response object for model init operation.
|
||||||
|
*/
|
||||||
|
interface ModelInitOptions {
|
||||||
|
modelFullPath: string;
|
||||||
|
model: Model;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The response object of Prompt Template parsing.
|
||||||
|
*/
|
||||||
|
interface PromptTemplate {
|
||||||
|
system_prompt?: string;
|
||||||
|
ai_prompt?: string;
|
||||||
|
user_prompt?: string;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Model setting args for Nitro model load.
|
||||||
|
*/
|
||||||
|
interface ModelSettingArgs extends ModelSettingParams {
|
||||||
|
llama_model_path: string;
|
||||||
|
cpu_threads: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The PORT to use for the Nitro subprocess
|
||||||
|
const PORT = 3928;
|
||||||
|
// The HOST address to use for the Nitro subprocess
|
||||||
|
const LOCAL_HOST = "127.0.0.1";
|
||||||
|
// The URL for the Nitro subprocess
|
||||||
|
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
|
||||||
|
// The URL for the Nitro subprocess to load a model
|
||||||
|
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
|
||||||
|
// The URL for the Nitro subprocess to validate a model
|
||||||
|
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
|
||||||
|
// The URL for the Nitro subprocess to kill itself
|
||||||
|
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
|
||||||
|
|
||||||
|
// The supported model format
|
||||||
|
// TODO: Should be an array to support more models
|
||||||
|
const SUPPORTED_MODEL_FORMAT = ".gguf";
|
||||||
|
|
||||||
|
// The subprocess instance for Nitro
|
||||||
|
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
|
||||||
|
// The current model file url
|
||||||
|
let currentModelFile: string = "";
|
||||||
|
// The current model settings
|
||||||
|
let currentSettings: ModelSettingArgs | undefined = undefined;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stops a Nitro subprocess.
|
||||||
|
* @param wrapper - The model wrapper.
|
||||||
|
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
|
||||||
|
*/
|
||||||
|
function stopModel(): Promise<void> {
|
||||||
|
return killSubprocess();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes a Nitro subprocess to load a machine learning model.
|
||||||
|
* @param wrapper - The model wrapper.
|
||||||
|
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
||||||
|
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
||||||
|
*/
|
||||||
|
async function runModel(
|
||||||
|
wrapper: ModelInitOptions
|
||||||
|
): Promise<ModelOperationResponse | void> {
|
||||||
|
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||||
|
// Not a nitro model
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
|
currentModelFile = wrapper.modelFullPath;
|
||||||
|
const janRoot = path.join(homedir(), "jan");
|
||||||
|
if (!currentModelFile.includes(janRoot)) {
|
||||||
|
currentModelFile = path.join(janRoot, currentModelFile);
|
||||||
|
}
|
||||||
|
const files: string[] = fs.readdirSync(currentModelFile);
|
||||||
|
|
||||||
|
// Look for GGUF model file
|
||||||
|
const ggufBinFile = files.find(
|
||||||
|
(file) =>
|
||||||
|
file === path.basename(currentModelFile) ||
|
||||||
|
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
|
||||||
|
|
||||||
|
currentModelFile = path.join(currentModelFile, ggufBinFile);
|
||||||
|
|
||||||
|
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||||
|
return Promise.reject("Not a nitro model");
|
||||||
|
} else {
|
||||||
|
const nitroResourceProbe = await getResourcesInfo();
|
||||||
|
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||||
|
if (wrapper.model.settings.prompt_template) {
|
||||||
|
const promptTemplate = wrapper.model.settings.prompt_template;
|
||||||
|
const prompt = promptTemplateConverter(promptTemplate);
|
||||||
|
if (prompt?.error) {
|
||||||
|
return Promise.reject(prompt.error);
|
||||||
|
}
|
||||||
|
wrapper.model.settings.system_prompt = prompt.system_prompt;
|
||||||
|
wrapper.model.settings.user_prompt = prompt.user_prompt;
|
||||||
|
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentSettings = {
|
||||||
|
llama_model_path: currentModelFile,
|
||||||
|
...wrapper.model.settings,
|
||||||
|
// This is critical and requires real system information
|
||||||
|
cpu_threads: nitroResourceProbe.numCpuPhysicalCore,
|
||||||
|
};
|
||||||
|
return runNitroAndLoadModel();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* 1. Spawn Nitro process
|
||||||
|
* 2. Load model into Nitro subprocess
|
||||||
|
* 3. Validate model status
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
async function runNitroAndLoadModel() {
|
||||||
|
// Gather system information for CPU physical cores and memory
|
||||||
|
return killSubprocess()
|
||||||
|
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
||||||
|
.then(() => {
|
||||||
|
/**
|
||||||
|
* There is a problem with Windows process manager
|
||||||
|
* Should wait for awhile to make sure the port is free and subprocess is killed
|
||||||
|
* The tested threshold is 500ms
|
||||||
|
**/
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
return new Promise((resolve) => setTimeout(resolve, 500));
|
||||||
|
} else {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.then(spawnNitroProcess)
|
||||||
|
.then(() => loadLLMModel(currentSettings))
|
||||||
|
.then(validateModelStatus)
|
||||||
|
.catch((err) => {
|
||||||
|
// TODO: Broadcast error so app could display proper error message
|
||||||
|
log(`[NITRO]::Error: ${err}`);
|
||||||
|
return { error: err };
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse prompt template into agrs settings
|
||||||
|
* @param promptTemplate Template as string
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
|
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
||||||
|
// Split the string using the markers
|
||||||
|
const systemMarker = "{system_message}";
|
||||||
|
const promptMarker = "{prompt}";
|
||||||
|
|
||||||
|
if (
|
||||||
|
promptTemplate.includes(systemMarker) &&
|
||||||
|
promptTemplate.includes(promptMarker)
|
||||||
|
) {
|
||||||
|
// Find the indices of the markers
|
||||||
|
const systemIndex = promptTemplate.indexOf(systemMarker);
|
||||||
|
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||||
|
|
||||||
|
// Extract the parts of the string
|
||||||
|
const system_prompt = promptTemplate.substring(0, systemIndex);
|
||||||
|
const user_prompt = promptTemplate.substring(
|
||||||
|
systemIndex + systemMarker.length,
|
||||||
|
promptIndex
|
||||||
|
);
|
||||||
|
const ai_prompt = promptTemplate.substring(
|
||||||
|
promptIndex + promptMarker.length
|
||||||
|
);
|
||||||
|
|
||||||
|
// Return the split parts
|
||||||
|
return { system_prompt, user_prompt, ai_prompt };
|
||||||
|
} else if (promptTemplate.includes(promptMarker)) {
|
||||||
|
// Extract the parts of the string for the case where only promptMarker is present
|
||||||
|
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||||
|
const user_prompt = promptTemplate.substring(0, promptIndex);
|
||||||
|
const ai_prompt = promptTemplate.substring(
|
||||||
|
promptIndex + promptMarker.length
|
||||||
|
);
|
||||||
|
|
||||||
|
// Return the split parts
|
||||||
|
return { user_prompt, ai_prompt };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an error if none of the conditions are met
|
||||||
|
return { error: "Cannot split prompt template" };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
|
||||||
|
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
||||||
|
*/
|
||||||
|
function loadLLMModel(settings: any): Promise<Response> {
|
||||||
|
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
|
||||||
|
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify(settings),
|
||||||
|
retries: 3,
|
||||||
|
retryDelay: 500,
|
||||||
|
})
|
||||||
|
.then((res) => {
|
||||||
|
log(
|
||||||
|
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
||||||
|
res
|
||||||
|
)}`
|
||||||
|
);
|
||||||
|
return Promise.resolve(res);
|
||||||
|
})
|
||||||
|
.catch((err) => {
|
||||||
|
log(`[NITRO]::Error: Load model failed with error ${err}`);
|
||||||
|
return Promise.reject();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validates the status of a model.
|
||||||
|
* @returns {Promise<ModelOperationResponse>} A promise that resolves to an object.
|
||||||
|
* If the model is loaded successfully, the object is empty.
|
||||||
|
* If the model is not loaded successfully, the object contains an error message.
|
||||||
|
*/
|
||||||
|
async function validateModelStatus(): Promise<void> {
|
||||||
|
// Send a GET request to the validation URL.
|
||||||
|
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
||||||
|
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
||||||
|
method: "GET",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
retries: 5,
|
||||||
|
retryDelay: 500,
|
||||||
|
}).then(async (res: Response) => {
|
||||||
|
log(
|
||||||
|
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
||||||
|
res
|
||||||
|
)}`
|
||||||
|
);
|
||||||
|
// If the response is OK, check model_loaded status.
|
||||||
|
if (res.ok) {
|
||||||
|
const body = await res.json();
|
||||||
|
// If the model is loaded, return an empty object.
|
||||||
|
// Otherwise, return an object with an error message.
|
||||||
|
if (body.model_loaded) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Promise.reject("Validate model status failed");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Terminates the Nitro subprocess.
|
||||||
|
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
|
||||||
|
*/
|
||||||
|
async function killSubprocess(): Promise<void> {
|
||||||
|
const controller = new AbortController();
|
||||||
|
setTimeout(() => controller.abort(), 5000);
|
||||||
|
log(`[NITRO]::Debug: Request to kill Nitro`);
|
||||||
|
|
||||||
|
return fetch(NITRO_HTTP_KILL_URL, {
|
||||||
|
method: "DELETE",
|
||||||
|
signal: controller.signal,
|
||||||
|
})
|
||||||
|
.then(() => {
|
||||||
|
subprocess?.kill();
|
||||||
|
subprocess = undefined;
|
||||||
|
})
|
||||||
|
.catch(() => {})
|
||||||
|
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
|
||||||
|
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Spawns a Nitro subprocess.
|
||||||
|
* @returns A promise that resolves when the Nitro subprocess is started.
|
||||||
|
*/
|
||||||
|
function spawnNitroProcess(): Promise<any> {
|
||||||
|
log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
|
||||||
|
|
||||||
|
return new Promise<void>(async (resolve, reject) => {
|
||||||
|
let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
|
||||||
|
let executableOptions = executableNitroFile();
|
||||||
|
|
||||||
|
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
|
||||||
|
// Execute the binary
|
||||||
|
log(
|
||||||
|
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||||
|
);
|
||||||
|
subprocess = spawn(
|
||||||
|
executableOptions.executablePath,
|
||||||
|
["1", LOCAL_HOST, PORT.toString()],
|
||||||
|
{
|
||||||
|
cwd: binaryFolder,
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
// Handle subprocess output
|
||||||
|
subprocess.stdout.on("data", (data: any) => {
|
||||||
|
log(`[NITRO]::Debug: ${data}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
subprocess.stderr.on("data", (data: any) => {
|
||||||
|
log(`[NITRO]::Error: ${data}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
subprocess.on("close", (code: any) => {
|
||||||
|
log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
|
||||||
|
subprocess = undefined;
|
||||||
|
reject(`child process exited with code ${code}`);
|
||||||
|
});
|
||||||
|
|
||||||
|
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
|
||||||
|
log(`[NITRO]::Debug: Nitro is ready`);
|
||||||
|
resolve();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the system resources information
|
||||||
|
* TODO: Move to Core so that it can be reused
|
||||||
|
*/
|
||||||
|
function getResourcesInfo(): Promise<ResourcesInfo> {
|
||||||
|
return new Promise(async (resolve) => {
|
||||||
|
const cpu = await osUtils.cpuCount();
|
||||||
|
log(`[NITRO]::CPU informations - ${cpu}`);
|
||||||
|
const response: ResourcesInfo = {
|
||||||
|
numCpuPhysicalCore: cpu,
|
||||||
|
memAvailable: 0,
|
||||||
|
};
|
||||||
|
resolve(response);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Every module should have a dispose function
|
||||||
|
* This will be called when the extension is unloaded and should clean up any resources
|
||||||
|
* Also called when app is closed
|
||||||
|
*/
|
||||||
|
function dispose() {
|
||||||
|
// clean other registered resources here
|
||||||
|
killSubprocess();
|
||||||
|
}
|
||||||
|
|
||||||
|
export default {
|
||||||
|
runModel,
|
||||||
|
stopModel,
|
||||||
|
killSubprocess,
|
||||||
|
dispose,
|
||||||
|
updateNvidiaInfo,
|
||||||
|
getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
|
||||||
|
};
|
||||||
201
extensions/inference-nitro-extension/src/node/nvidia.ts
Normal file
201
extensions/inference-nitro-extension/src/node/nvidia.ts
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
import { writeFileSync, existsSync, readFileSync } from "fs";
|
||||||
|
import { exec } from "child_process";
|
||||||
|
import path from "path";
|
||||||
|
import { homedir } from "os";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default GPU settings
|
||||||
|
**/
|
||||||
|
const DEFALT_SETTINGS = {
|
||||||
|
notify: true,
|
||||||
|
run_mode: "cpu",
|
||||||
|
nvidia_driver: {
|
||||||
|
exist: false,
|
||||||
|
version: "",
|
||||||
|
},
|
||||||
|
cuda: {
|
||||||
|
exist: false,
|
||||||
|
version: "",
|
||||||
|
},
|
||||||
|
gpus: [],
|
||||||
|
gpu_highest_vram: "",
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Path to the settings file
|
||||||
|
**/
|
||||||
|
export const NVIDIA_INFO_FILE = path.join(
|
||||||
|
homedir(),
|
||||||
|
"jan",
|
||||||
|
"settings",
|
||||||
|
"settings.json"
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current nitro process
|
||||||
|
*/
|
||||||
|
let nitroProcessInfo: NitroProcessInfo | undefined = undefined;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nitro process info
|
||||||
|
*/
|
||||||
|
export interface NitroProcessInfo {
|
||||||
|
isRunning: boolean
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This will retrive GPU informations and persist settings.json
|
||||||
|
* Will be called when the extension is loaded to turn on GPU acceleration if supported
|
||||||
|
*/
|
||||||
|
export async function updateNvidiaInfo() {
|
||||||
|
if (process.platform !== "darwin") {
|
||||||
|
await Promise.all([
|
||||||
|
updateNvidiaDriverInfo(),
|
||||||
|
updateCudaExistence(),
|
||||||
|
updateGpuInfo(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve current nitro process
|
||||||
|
*/
|
||||||
|
export const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => {
|
||||||
|
nitroProcessInfo = {
|
||||||
|
isRunning: subprocess != null,
|
||||||
|
};
|
||||||
|
return nitroProcessInfo;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate nvidia and cuda for linux and windows
|
||||||
|
*/
|
||||||
|
export async function updateNvidiaDriverInfo(): Promise<void> {
|
||||||
|
exec(
|
||||||
|
"nvidia-smi --query-gpu=driver_version --format=csv,noheader",
|
||||||
|
(error, stdout) => {
|
||||||
|
let data;
|
||||||
|
try {
|
||||||
|
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
||||||
|
} catch (error) {
|
||||||
|
data = DEFALT_SETTINGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!error) {
|
||||||
|
const firstLine = stdout.split("\n")[0].trim();
|
||||||
|
data["nvidia_driver"].exist = true;
|
||||||
|
data["nvidia_driver"].version = firstLine;
|
||||||
|
} else {
|
||||||
|
data["nvidia_driver"].exist = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
|
||||||
|
Promise.resolve();
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if file exists in paths
|
||||||
|
*/
|
||||||
|
export function checkFileExistenceInPaths(
|
||||||
|
file: string,
|
||||||
|
paths: string[]
|
||||||
|
): boolean {
|
||||||
|
return paths.some((p) => existsSync(path.join(p, file)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate cuda for linux and windows
|
||||||
|
*/
|
||||||
|
export function updateCudaExistence() {
|
||||||
|
let filesCuda12: string[];
|
||||||
|
let filesCuda11: string[];
|
||||||
|
let paths: string[];
|
||||||
|
let cudaVersion: string = "";
|
||||||
|
|
||||||
|
if (process.platform === "win32") {
|
||||||
|
filesCuda12 = ["cublas64_12.dll", "cudart64_12.dll", "cublasLt64_12.dll"];
|
||||||
|
filesCuda11 = ["cublas64_11.dll", "cudart64_11.dll", "cublasLt64_11.dll"];
|
||||||
|
paths = process.env.PATH ? process.env.PATH.split(path.delimiter) : [];
|
||||||
|
} else {
|
||||||
|
filesCuda12 = ["libcudart.so.12", "libcublas.so.12", "libcublasLt.so.12"];
|
||||||
|
filesCuda11 = ["libcudart.so.11.0", "libcublas.so.11", "libcublasLt.so.11"];
|
||||||
|
paths = process.env.LD_LIBRARY_PATH
|
||||||
|
? process.env.LD_LIBRARY_PATH.split(path.delimiter)
|
||||||
|
: [];
|
||||||
|
paths.push("/usr/lib/x86_64-linux-gnu/");
|
||||||
|
}
|
||||||
|
|
||||||
|
let cudaExists = filesCuda12.every(
|
||||||
|
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!cudaExists) {
|
||||||
|
cudaExists = filesCuda11.every(
|
||||||
|
(file) => existsSync(file) || checkFileExistenceInPaths(file, paths)
|
||||||
|
);
|
||||||
|
if (cudaExists) {
|
||||||
|
cudaVersion = "11";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
cudaVersion = "12";
|
||||||
|
}
|
||||||
|
|
||||||
|
let data;
|
||||||
|
try {
|
||||||
|
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
||||||
|
} catch (error) {
|
||||||
|
data = DEFALT_SETTINGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
data["cuda"].exist = cudaExists;
|
||||||
|
data["cuda"].version = cudaVersion;
|
||||||
|
if (cudaExists) {
|
||||||
|
data.run_mode = "gpu";
|
||||||
|
}
|
||||||
|
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get GPU information
|
||||||
|
*/
|
||||||
|
export async function updateGpuInfo(): Promise<void> {
|
||||||
|
exec(
|
||||||
|
"nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits",
|
||||||
|
(error, stdout) => {
|
||||||
|
let data;
|
||||||
|
try {
|
||||||
|
data = JSON.parse(readFileSync(NVIDIA_INFO_FILE, "utf-8"));
|
||||||
|
} catch (error) {
|
||||||
|
data = DEFALT_SETTINGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!error) {
|
||||||
|
// Get GPU info and gpu has higher memory first
|
||||||
|
let highestVram = 0;
|
||||||
|
let highestVramId = "0";
|
||||||
|
let gpus = stdout
|
||||||
|
.trim()
|
||||||
|
.split("\n")
|
||||||
|
.map((line) => {
|
||||||
|
let [id, vram] = line.split(", ");
|
||||||
|
vram = vram.replace(/\r/g, "");
|
||||||
|
if (parseFloat(vram) > highestVram) {
|
||||||
|
highestVram = parseFloat(vram);
|
||||||
|
highestVramId = id;
|
||||||
|
}
|
||||||
|
return { id, vram };
|
||||||
|
});
|
||||||
|
|
||||||
|
data["gpus"] = gpus;
|
||||||
|
data["gpu_highest_vram"] = highestVramId;
|
||||||
|
} else {
|
||||||
|
data["gpus"] = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
writeFileSync(NVIDIA_INFO_FILE, JSON.stringify(data, null, 2));
|
||||||
|
Promise.resolve();
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
@ -1,15 +1,19 @@
|
|||||||
{
|
{
|
||||||
"compilerOptions": {
|
"compilerOptions": {
|
||||||
"target": "es2016",
|
|
||||||
"module": "ES6",
|
|
||||||
"moduleResolution": "node",
|
"moduleResolution": "node",
|
||||||
|
"target": "es5",
|
||||||
"outDir": "./dist",
|
"module": "ES2020",
|
||||||
"esModuleInterop": true,
|
"lib": ["es2015", "es2016", "es2017", "dom"],
|
||||||
"forceConsistentCasingInFileNames": true,
|
"strict": true,
|
||||||
"strict": false,
|
"sourceMap": true,
|
||||||
"skipLibCheck": true,
|
"declaration": true,
|
||||||
"rootDir": "./src"
|
"allowSyntheticDefaultImports": true,
|
||||||
|
"experimentalDecorators": true,
|
||||||
|
"emitDecoratorMetadata": true,
|
||||||
|
"declarationDir": "dist/types",
|
||||||
|
"outDir": "dist",
|
||||||
|
"importHelpers": true,
|
||||||
|
"typeRoots": ["node_modules/@types"]
|
||||||
},
|
},
|
||||||
"include": ["./src"]
|
"include": ["src"]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,43 +0,0 @@
|
|||||||
const path = require("path");
|
|
||||||
const webpack = require("webpack");
|
|
||||||
const packageJson = require("./package.json");
|
|
||||||
|
|
||||||
module.exports = {
|
|
||||||
experiments: { outputModule: true },
|
|
||||||
entry: "./src/index.ts", // Adjust the entry point to match your project's main file
|
|
||||||
mode: "production",
|
|
||||||
module: {
|
|
||||||
rules: [
|
|
||||||
{
|
|
||||||
test: /\.tsx?$/,
|
|
||||||
use: "ts-loader",
|
|
||||||
exclude: /node_modules/,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
plugins: [
|
|
||||||
new webpack.DefinePlugin({
|
|
||||||
MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`),
|
|
||||||
INFERENCE_URL: JSON.stringify(
|
|
||||||
process.env.INFERENCE_URL ||
|
|
||||||
"http://127.0.0.1:3928/inferences/llamacpp/chat_completion"
|
|
||||||
),
|
|
||||||
TROUBLESHOOTING_URL: JSON.stringify("https://jan.ai/guides/troubleshooting")
|
|
||||||
}),
|
|
||||||
],
|
|
||||||
output: {
|
|
||||||
filename: "index.js", // Adjust the output file name as needed
|
|
||||||
path: path.resolve(__dirname, "dist"),
|
|
||||||
library: { type: "module" }, // Specify ESM output format
|
|
||||||
},
|
|
||||||
resolve: {
|
|
||||||
extensions: [".ts", ".js"],
|
|
||||||
fallback: {
|
|
||||||
path: require.resolve("path-browserify"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
optimization: {
|
|
||||||
minimize: false,
|
|
||||||
},
|
|
||||||
// Add loaders and other configuration as needed for your project
|
|
||||||
};
|
|
||||||
Loading…
x
Reference in New Issue
Block a user