Merge branch 'dev' into docs/api-reference
This commit is contained in:
commit
8282406626
4
.husky/pre-commit
Normal file
4
.husky/pre-commit
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#!/usr/bin/env sh
|
||||||
|
. "$(dirname -- "$0")/_/husky.sh"
|
||||||
|
|
||||||
|
npx pretty-quick --staged
|
||||||
@ -40,9 +40,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@janhq/core": "file:../../core",
|
"@janhq/core": "file:../../core",
|
||||||
"@rollup/plugin-replace": "^5.0.5",
|
"@rollup/plugin-replace": "^5.0.5",
|
||||||
"@types/os-utils": "^0.0.4",
|
|
||||||
"fetch-retry": "^5.0.6",
|
"fetch-retry": "^5.0.6",
|
||||||
"os-utils": "^0.0.14",
|
|
||||||
"path-browserify": "^1.0.1",
|
"path-browserify": "^1.0.1",
|
||||||
"rxjs": "^7.8.1",
|
"rxjs": "^7.8.1",
|
||||||
"tcp-port-used": "^1.0.2",
|
"tcp-port-used": "^1.0.2",
|
||||||
@ -59,7 +57,6 @@
|
|||||||
"bundleDependencies": [
|
"bundleDependencies": [
|
||||||
"tcp-port-used",
|
"tcp-port-used",
|
||||||
"fetch-retry",
|
"fetch-retry",
|
||||||
"os-utils",
|
|
||||||
"@janhq/core"
|
"@janhq/core"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|||||||
@ -86,11 +86,17 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
|||||||
this.onMessageRequest(data)
|
this.onMessageRequest(data)
|
||||||
);
|
);
|
||||||
|
|
||||||
events.on(ModelEvent.OnModelInit, (model: Model) => this.onModelInit(model));
|
events.on(ModelEvent.OnModelInit, (model: Model) =>
|
||||||
|
this.onModelInit(model)
|
||||||
|
);
|
||||||
|
|
||||||
events.on(ModelEvent.OnModelStop, (model: Model) => this.onModelStop(model));
|
events.on(ModelEvent.OnModelStop, (model: Model) =>
|
||||||
|
this.onModelStop(model)
|
||||||
|
);
|
||||||
|
|
||||||
events.on(InferenceEvent.OnInferenceStopped, () => this.onInferenceStopped());
|
events.on(InferenceEvent.OnInferenceStopped, () =>
|
||||||
|
this.onInferenceStopped()
|
||||||
|
);
|
||||||
|
|
||||||
// Attempt to fetch nvidia info
|
// Attempt to fetch nvidia info
|
||||||
await executeOnMain(NODE, "updateNvidiaInfo", {});
|
await executeOnMain(NODE, "updateNvidiaInfo", {});
|
||||||
@ -200,11 +206,11 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
|||||||
if (!this._currentModel) return Promise.reject("No model loaded");
|
if (!this._currentModel) return Promise.reject("No model loaded");
|
||||||
|
|
||||||
requestInference(data.messages ?? [], this._currentModel).subscribe({
|
requestInference(data.messages ?? [], this._currentModel).subscribe({
|
||||||
next: (_content) => {},
|
next: (_content: any) => {},
|
||||||
complete: async () => {
|
complete: async () => {
|
||||||
resolve(message);
|
resolve(message);
|
||||||
},
|
},
|
||||||
error: async (err) => {
|
error: async (err: any) => {
|
||||||
reject(err);
|
reject(err);
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
@ -245,7 +251,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
|||||||
...(data.model || {}),
|
...(data.model || {}),
|
||||||
};
|
};
|
||||||
requestInference(data.messages ?? [], model, this.controller).subscribe({
|
requestInference(data.messages ?? [], model, this.controller).subscribe({
|
||||||
next: (content) => {
|
next: (content: any) => {
|
||||||
const messageContent: ThreadContent = {
|
const messageContent: ThreadContent = {
|
||||||
type: ContentType.Text,
|
type: ContentType.Text,
|
||||||
text: {
|
text: {
|
||||||
@ -262,7 +268,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
|||||||
: MessageStatus.Error;
|
: MessageStatus.Error;
|
||||||
events.emit(MessageEvent.OnMessageUpdate, message);
|
events.emit(MessageEvent.OnMessageUpdate, message);
|
||||||
},
|
},
|
||||||
error: async (err) => {
|
error: async (err: any) => {
|
||||||
if (this.isCancelled || message.content.length) {
|
if (this.isCancelled || message.content.length) {
|
||||||
message.status = MessageStatus.Stopped;
|
message.status = MessageStatus.Stopped;
|
||||||
events.emit(MessageEvent.OnMessageUpdate, message);
|
events.emit(MessageEvent.OnMessageUpdate, message);
|
||||||
|
|||||||
@ -3,11 +3,12 @@ import path from "path";
|
|||||||
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
||||||
import tcpPortUsed from "tcp-port-used";
|
import tcpPortUsed from "tcp-port-used";
|
||||||
import fetchRT from "fetch-retry";
|
import fetchRT from "fetch-retry";
|
||||||
import osUtils from "os-utils";
|
|
||||||
import { log, getJanDataFolderPath } from "@janhq/core/node";
|
import { log, getJanDataFolderPath } from "@janhq/core/node";
|
||||||
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
|
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
|
||||||
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
|
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
|
||||||
import { executableNitroFile } from "./execute";
|
import { executableNitroFile } from "./execute";
|
||||||
|
import { physicalCpuCount } from "./utils";
|
||||||
|
|
||||||
// Polyfill fetch with retry
|
// Polyfill fetch with retry
|
||||||
const fetchRetry = fetchRT(fetch);
|
const fetchRetry = fetchRT(fetch);
|
||||||
|
|
||||||
@ -121,9 +122,10 @@ async function runModel(
|
|||||||
currentSettings = {
|
currentSettings = {
|
||||||
llama_model_path: currentModelFile,
|
llama_model_path: currentModelFile,
|
||||||
...wrapper.model.settings,
|
...wrapper.model.settings,
|
||||||
// This is critical and requires real system information
|
// This is critical and requires real CPU physical core count (or performance core)
|
||||||
cpu_threads: Math.max(1, Math.round(nitroResourceProbe.numCpuPhysicalCore / 2)),
|
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||||
};
|
};
|
||||||
|
console.log(currentSettings);
|
||||||
return runNitroAndLoadModel();
|
return runNitroAndLoadModel();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -348,7 +350,7 @@ function spawnNitroProcess(): Promise<any> {
|
|||||||
*/
|
*/
|
||||||
function getResourcesInfo(): Promise<ResourcesInfo> {
|
function getResourcesInfo(): Promise<ResourcesInfo> {
|
||||||
return new Promise(async (resolve) => {
|
return new Promise(async (resolve) => {
|
||||||
const cpu = await osUtils.cpuCount();
|
const cpu = await physicalCpuCount();
|
||||||
log(`[NITRO]::CPU informations - ${cpu}`);
|
log(`[NITRO]::CPU informations - ${cpu}`);
|
||||||
const response: ResourcesInfo = {
|
const response: ResourcesInfo = {
|
||||||
numCpuPhysicalCore: cpu,
|
numCpuPhysicalCore: cpu,
|
||||||
|
|||||||
56
extensions/inference-nitro-extension/src/node/utils.ts
Normal file
56
extensions/inference-nitro-extension/src/node/utils.ts
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import os from "os";
|
||||||
|
import childProcess from "child_process";
|
||||||
|
|
||||||
|
function exec(command: string): Promise<string> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
|
||||||
|
if (error) {
|
||||||
|
reject(error);
|
||||||
|
} else {
|
||||||
|
resolve(stdout);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let amount: number;
|
||||||
|
const platform = os.platform();
|
||||||
|
|
||||||
|
export async function physicalCpuCount(): Promise<number> {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
if (platform === "linux") {
|
||||||
|
exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
|
||||||
|
.then((output) => {
|
||||||
|
amount = parseInt(output.trim(), 10);
|
||||||
|
resolve(amount);
|
||||||
|
})
|
||||||
|
.catch(reject);
|
||||||
|
} else if (platform === "darwin") {
|
||||||
|
exec("sysctl -n hw.physicalcpu_max")
|
||||||
|
.then((output) => {
|
||||||
|
amount = parseInt(output.trim(), 10);
|
||||||
|
resolve(amount);
|
||||||
|
})
|
||||||
|
.catch(reject);
|
||||||
|
} else if (platform === "win32") {
|
||||||
|
exec("WMIC CPU Get NumberOfCores")
|
||||||
|
.then((output) => {
|
||||||
|
amount = output
|
||||||
|
.split(os.EOL)
|
||||||
|
.map((line: string) => parseInt(line))
|
||||||
|
.filter((value: number) => !isNaN(value))
|
||||||
|
.reduce((sum: number, number: number) => sum + number, 1);
|
||||||
|
resolve(amount);
|
||||||
|
})
|
||||||
|
.catch(reject);
|
||||||
|
} else {
|
||||||
|
const cores = os.cpus().filter((cpu: any, index: number) => {
|
||||||
|
const hasHyperthreading = cpu.model.includes("Intel");
|
||||||
|
const isOdd = index % 2 === 1;
|
||||||
|
return !hasHyperthreading || isOdd;
|
||||||
|
});
|
||||||
|
amount = cores.length;
|
||||||
|
resolve(amount);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user