feat: add start/stop model via http api (#1862)

Signed-off-by: nam <namnh0122@gmail.com>
This commit is contained in:
NamH 2024-02-01 11:25:34 +07:00 committed by GitHub
parent 4b8b13b5d3
commit 4116aaa98a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 559 additions and 194 deletions

View File

@ -2,7 +2,8 @@ import fs from 'fs'
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
import { join } from 'path'
import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
import { getJanDataFolderPath } from '../../utils'
import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
export const getBuilder = async (configuration: RouteConfiguration) => {
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
@ -309,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
let apiKey: string | undefined = undefined
let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
if (engineConfiguration) {
apiKey = engineConfiguration.api_key
@ -320,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
"Access-Control-Allow-Origin": "*"
'Access-Control-Allow-Origin': '*',
})
const headers: Record<string, any> = {
@ -346,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
response.body.pipe(reply.raw)
}
}
const getEngineConfiguration = async (engineId: string) => {
if (engineId !== 'openai') {
return undefined
}
const directoryPath = join(getJanDataFolderPath(), 'engines')
const filePath = join(directoryPath, `${engineId}.json`)
const data = await fs.readFileSync(filePath, 'utf-8')
return JSON.parse(data)
}

View File

@ -0,0 +1,19 @@
// The PORT to use for the Nitro subprocess
export const NITRO_DEFAULT_PORT = 3928
// The HOST address to use for the Nitro subprocess
export const LOCAL_HOST = '127.0.0.1'
export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
// The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
// The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url

View File

@ -0,0 +1,351 @@
import fs from 'fs'
import { join } from 'path'
import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
import { logServer } from '../../log'
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
import {
LOCAL_HOST,
NITRO_DEFAULT_PORT,
NITRO_HTTP_KILL_URL,
NITRO_HTTP_LOAD_MODEL_URL,
NITRO_HTTP_VALIDATE_MODEL_URL,
SUPPORTED_MODEL_FORMAT,
} from './consts'
// The subprocess instance for Nitro
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// TODO: move this to core type
interface NitroModelSettings extends ModelSettingParams {
llama_model_path: string
cpu_threads: number
}
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
try {
await runModel(modelId, settingParams)
return {
message: `Model ${modelId} started`,
}
} catch (e) {
return {
error: e,
}
}
}
const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
const janDataFolderPath = getJanDataFolderPath()
const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
if (!fs.existsSync(modelFolderFullPath)) {
throw `Model not found: ${modelId}`
}
const files: string[] = fs.readdirSync(modelFolderFullPath)
// Look for GGUF model file
const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
const modelMetadataPath = join(modelFolderFullPath, 'model.json')
const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
if (!ggufBinFile) {
throw 'No GGUF model file found'
}
const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
const nitroResourceProbe = await getSystemResourceInfo()
const nitroModelSettings: NitroModelSettings = {
...modelMetadata.settings,
...settingParams,
llama_model_path: modelBinaryPath,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(modelMetadata.settings.mmproj && {
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
}),
}
logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (modelMetadata.settings.prompt_template) {
const promptTemplate = modelMetadata.settings.prompt_template
const prompt = promptTemplateConverter(promptTemplate)
if (prompt?.error) {
return Promise.reject(prompt.error)
}
nitroModelSettings.system_prompt = prompt.system_prompt
nitroModelSettings.user_prompt = prompt.user_prompt
nitroModelSettings.ai_prompt = prompt.ai_prompt
}
await runNitroAndLoadModel(modelId, nitroModelSettings)
}
// TODO: move to util
const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
// Split the string using the markers
const systemMarker = '{system_message}'
const promptMarker = '{prompt}'
if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker)
const promptIndex = promptTemplate.indexOf(promptMarker)
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex)
const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
// Return the split parts
return { system_prompt, user_prompt, ai_prompt }
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker)
const user_prompt = promptTemplate.substring(0, promptIndex)
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
// Return the split parts
return { user_prompt, ai_prompt }
}
// Return an error if none of the conditions are met
return { error: 'Cannot split prompt template' }
}
const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
// Gather system information for CPU physical cores and memory
const tcpPortUsed = require('tcp-port-used')
await stopModel(modelId)
await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
/**
* There is a problem with Windows process manager
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === 'win32') {
await new Promise((resolve) => setTimeout(resolve, 500))
}
await spawnNitroProcess()
await loadLLMModel(modelSettings)
await validateModelStatus()
}
const spawnNitroProcess = async (): Promise<void> => {
logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'dist',
'bin'
)
let executableOptions = executableNitroFile()
const tcpPortUsed = require('tcp-port-used')
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
// Execute the binary
logServer(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
)
subprocess = spawn(
executableOptions.executablePath,
['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
{
cwd: binaryFolder,
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
}
)
// Handle subprocess output
subprocess.stdout.on('data', (data: any) => {
logServer(`[NITRO]::Debug: ${data}`)
})
subprocess.stderr.on('data', (data: any) => {
logServer(`[NITRO]::Error: ${data}`)
})
subprocess.on('close', (code: any) => {
logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
subprocess = undefined
})
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
logServer(`[NITRO]::Debug: Nitro is ready`)
})
}
type NitroExecutableOptions = {
executablePath: string
cudaVisibleDevices: string
}
const executableNitroFile = (): NitroExecutableOptions => {
const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
let binaryFolder = join(
getJanExtensionsPath(),
'@janhq',
'inference-nitro-extension',
'dist',
'bin'
)
let cudaVisibleDevices = ''
let binaryName = 'nitro'
/**
* The binary folder is different for each platform.
*/
if (process.platform === 'win32') {
/**
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
*/
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
binaryFolder = join(binaryFolder, 'win-cpu')
} else {
if (nvidiaInfo['cuda'].version === '12') {
binaryFolder = join(binaryFolder, 'win-cuda-12-0')
} else {
binaryFolder = join(binaryFolder, 'win-cuda-11-7')
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
binaryName = 'nitro.exe'
} else if (process.platform === 'darwin') {
/**
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
*/
if (process.arch === 'arm64') {
binaryFolder = join(binaryFolder, 'mac-arm64')
} else {
binaryFolder = join(binaryFolder, 'mac-x64')
}
} else {
/**
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
*/
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
if (nvidiaInfo['run_mode'] === 'cpu') {
binaryFolder = join(binaryFolder, 'linux-cpu')
} else {
if (nvidiaInfo['cuda'].version === '12') {
binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
} else {
binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
}
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
}
}
return {
executablePath: join(binaryFolder, binaryName),
cudaVisibleDevices,
}
}
const validateModelStatus = async (): Promise<void> => {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
const fetchRT = require('fetch-retry')
const fetchRetry = fetchRT(fetch)
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: 'GET',
headers: {
'Content-Type': 'application/json',
},
retries: 5,
retryDelay: 500,
}).then(async (res: Response) => {
logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json()
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
return Promise.resolve()
}
}
return Promise.reject('Validate model status failed')
})
}
const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
const fetchRT = require('fetch-retry')
const fetchRetry = fetchRT(fetch)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(settings),
retries: 3,
retryDelay: 500,
})
.then((res: any) => {
logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
return Promise.resolve(res)
})
.catch((err: any) => {
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
return Promise.reject()
})
}
/**
* Stop model and kill nitro process.
*/
export const stopModel = async (_modelId: string) => {
if (!subprocess) {
return {
error: "Model isn't running",
}
}
return new Promise((resolve, reject) => {
const controller = new AbortController()
setTimeout(() => {
controller.abort()
reject({
error: 'Failed to stop model: Timedout',
})
}, 5000)
const tcpPortUsed = require('tcp-port-used')
logServer(`[NITRO]::Debug: Request to kill Nitro`)
fetch(NITRO_HTTP_KILL_URL, {
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess?.kill()
subprocess = undefined
})
.catch(() => {
// don't need to do anything, we still kill the subprocess
})
.then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
.then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
.then(() =>
resolve({
message: 'Model stopped',
})
)
})
}

View File

@ -10,6 +10,8 @@ import {
} from '../common/builder'
import { JanApiRouteConfiguration } from '../common/configuration'
import { startModel, stopModel } from '../common/startStopModel'
import { ModelSettingParams } from '../../../types'
export const commonRouter = async (app: HttpServer) => {
// Common Routes
@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
app.get(`/${key}/:id`, async (request: any) =>
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
)
app.delete(`/${key}/:id`, async (request: any) =>
deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
)
})
// Download Model Routes
app.get(`/models/download/:modelId`, async (request: any) =>
downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
downloadModel(request.params.modelId, {
ignoreSSL: request.query.ignoreSSL === 'true',
proxy: request.query.proxy,
})
)
app.put(`/models/:modelId/start`, async (request: any) => {
let settingParams: ModelSettingParams | undefined = undefined
if (Object.keys(request.body).length !== 0) {
settingParams = JSON.parse(request.body) as ModelSettingParams
}
return startModel(request.params.modelId, settingParams)
})
app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
// Chat Completion Routes
app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))

View File

@ -1,16 +1,18 @@
import { AppConfiguration } from "../../types";
import { join } from "path";
import fs from "fs";
import os from "os";
import { AppConfiguration, SystemResourceInfo } from '../../types'
import { join } from 'path'
import fs from 'fs'
import os from 'os'
import { log, logServer } from '../log'
import childProcess from 'child_process'
// TODO: move this to core
const configurationFileName = "settings.json";
const configurationFileName = 'settings.json'
// TODO: do no specify app name in framework module
const defaultJanDataFolder = join(os.homedir(), "jan");
const defaultJanDataFolder = join(os.homedir(), 'jan')
const defaultAppConfig: AppConfiguration = {
data_folder: defaultJanDataFolder,
};
}
/**
* Getting App Configurations.
@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
export const getAppConfigurations = (): AppConfiguration => {
// Retrieve Application Support folder path
// Fallback to user home directory if not found
const configurationFile = getConfigurationFilePath();
const configurationFile = getConfigurationFilePath()
if (!fs.existsSync(configurationFile)) {
// create default app config if we don't have one
console.debug(`App config not found, creating default config at ${configurationFile}`);
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
return defaultAppConfig;
console.debug(`App config not found, creating default config at ${configurationFile}`)
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
return defaultAppConfig
}
try {
const appConfigurations: AppConfiguration = JSON.parse(
fs.readFileSync(configurationFile, "utf-8"),
);
return appConfigurations;
fs.readFileSync(configurationFile, 'utf-8')
)
return appConfigurations
} catch (err) {
console.error(`Failed to read app config, return default config instead! Err: ${err}`);
return defaultAppConfig;
console.error(`Failed to read app config, return default config instead! Err: ${err}`)
return defaultAppConfig
}
}
};
const getConfigurationFilePath = () =>
join(
global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
configurationFileName,
);
global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
configurationFileName
)
export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
const configurationFile = getConfigurationFilePath();
console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
const configurationFile = getConfigurationFilePath()
console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
fs.writeFileSync(configurationFile, JSON.stringify(configuration));
return Promise.resolve();
};
fs.writeFileSync(configurationFile, JSON.stringify(configuration))
return Promise.resolve()
}
/**
* Utility function to get server log path
@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
* @returns {string} The log path.
*/
export const getServerLogPath = (): string => {
const appConfigurations = getAppConfigurations();
const logFolderPath = join(appConfigurations.data_folder, "logs");
const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs')
if (!fs.existsSync(logFolderPath)) {
fs.mkdirSync(logFolderPath, { recursive: true });
fs.mkdirSync(logFolderPath, { recursive: true })
}
return join(logFolderPath, 'server.log')
}
return join(logFolderPath, "server.log");
};
/**
* Utility function to get app log path
@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
* @returns {string} The log path.
*/
export const getAppLogPath = (): string => {
const appConfigurations = getAppConfigurations();
const logFolderPath = join(appConfigurations.data_folder, "logs");
const appConfigurations = getAppConfigurations()
const logFolderPath = join(appConfigurations.data_folder, 'logs')
if (!fs.existsSync(logFolderPath)) {
fs.mkdirSync(logFolderPath, { recursive: true });
fs.mkdirSync(logFolderPath, { recursive: true })
}
return join(logFolderPath, 'app.log')
}
return join(logFolderPath, "app.log");
};
/**
* Utility function to get data folder path
@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
* @returns {string} The data folder path.
*/
export const getJanDataFolderPath = (): string => {
const appConfigurations = getAppConfigurations();
return appConfigurations.data_folder;
};
const appConfigurations = getAppConfigurations()
return appConfigurations.data_folder
}
/**
* Utility function to get extension path
@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
* @returns {string} The extensions path.
*/
export const getJanExtensionsPath = (): string => {
const appConfigurations = getAppConfigurations();
return join(appConfigurations.data_folder, "extensions");
};
const appConfigurations = getAppConfigurations()
return join(appConfigurations.data_folder, 'extensions')
}
/**
* Utility function to physical cpu count
*
* @returns {number} The physical cpu count.
*/
export const physicalCpuCount = async (): Promise<number> => {
const platform = os.platform()
if (platform === 'linux') {
const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
return parseInt(output.trim(), 10)
} else if (platform === 'darwin') {
const output = await exec('sysctl -n hw.physicalcpu_max')
return parseInt(output.trim(), 10)
} else if (platform === 'win32') {
const output = await exec('WMIC CPU Get NumberOfCores')
return output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1)
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes('Intel')
const isOdd = index % 2 === 1
return !hasHyperthreading || isOdd
})
return cores.length
}
}
const exec = async (command: string): Promise<string> => {
return new Promise((resolve, reject) => {
childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
if (error) {
reject(error)
} else {
resolve(stdout)
}
})
})
}
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
const cpu = await physicalCpuCount()
const message = `[NITRO]::CPU informations - ${cpu}`
log(message)
logServer(message)
return {
numCpuPhysicalCore: cpu,
memAvailable: 0, // TODO: this should not be 0
}
}
export const getEngineConfiguration = async (engineId: string) => {
if (engineId !== 'openai') {
return undefined
}
const directoryPath = join(getJanDataFolderPath(), 'engines')
const filePath = join(directoryPath, `${engineId}.json`)
const data = fs.readFileSync(filePath, 'utf-8')
return JSON.parse(data)
}

View File

@ -6,3 +6,4 @@ export * from './inference'
export * from './monitoring'
export * from './file'
export * from './config'
export * from './miscellaneous'

View File

@ -0,0 +1,2 @@
export * from './systemResourceInfo'
export * from './promptTemplate'

View File

@ -0,0 +1,6 @@
export type PromptTemplate = {
system_prompt?: string
ai_prompt?: string
user_prompt?: string
error?: string
}

View File

@ -0,0 +1,4 @@
export type SystemResourceInfo = {
numCpuPhysicalCore: number
memAvailable: number
}

View File

@ -123,6 +123,7 @@ export type ModelSettingParams = {
user_prompt?: string
llama_model_path?: string
mmproj?: string
cont_batching?: boolean
}
/**

View File

@ -2,22 +2,6 @@ declare const NODE: string;
declare const INFERENCE_URL: string;
declare const TROUBLESHOOTING_URL: string;
/**
* The parameters for the initModel function.
* @property settings - The settings for the machine learning model.
* @property settings.ctx_len - The context length.
* @property settings.ngl - The number of generated tokens.
* @property settings.cont_batching - Whether to use continuous batching.
* @property settings.embedding - Whether to use embedding.
*/
interface EngineSettings {
ctx_len: number;
ngl: number;
cpu_threads: number;
cont_batching: boolean;
embedding: boolean;
}
/**
* The response from the initModel function.
* @property error - An error message if the model fails to load.
@ -26,8 +10,3 @@ interface ModelOperationResponse {
error?: any;
modelFile?: string;
}
interface ResourcesInfo {
numCpuPhysicalCore: number;
memAvailable: number;
}

View File

@ -24,6 +24,7 @@ import {
MessageEvent,
ModelEvent,
InferenceEvent,
ModelSettingParams,
} from "@janhq/core";
import { requestInference } from "./helpers/sse";
import { ulid } from "ulid";
@ -45,7 +46,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
private _currentModel: Model | undefined;
private _engineSettings: EngineSettings = {
private _engineSettings: ModelSettingParams = {
ctx_len: 2048,
ngl: 100,
cpu_threads: 1,

View File

@ -3,11 +3,19 @@ import path from "path";
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import tcpPortUsed from "tcp-port-used";
import fetchRT from "fetch-retry";
import { log, getJanDataFolderPath } from "@janhq/core/node";
import {
log,
getJanDataFolderPath,
getSystemResourceInfo,
} from "@janhq/core/node";
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
import {
Model,
InferenceEngine,
ModelSettingParams,
PromptTemplate,
} from "@janhq/core";
import { executableNitroFile } from "./execute";
import { physicalCpuCount } from "./utils";
// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch);
@ -20,16 +28,6 @@ interface ModelInitOptions {
model: Model;
}
/**
* The response object of Prompt Template parsing.
*/
interface PromptTemplate {
system_prompt?: string;
ai_prompt?: string;
user_prompt?: string;
error?: string;
}
/**
* Model setting args for Nitro model load.
*/
@ -78,7 +76,7 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions,
wrapper: ModelInitOptions
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
@ -96,7 +94,7 @@ async function runModel(
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
);
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -106,7 +104,7 @@ async function runModel(
if (wrapper.model.engine !== InferenceEngine.nitro) {
return Promise.reject("Not a nitro model");
} else {
const nitroResourceProbe = await getResourcesInfo();
const nitroResourceProbe = await getSystemResourceInfo();
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (wrapper.model.settings.prompt_template) {
const promptTemplate = wrapper.model.settings.prompt_template;
@ -191,10 +189,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex,
promptIndex
);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length,
promptIndex + promptMarker.length
);
// Return the split parts
@ -204,7 +202,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length,
promptIndex + promptMarker.length
);
// Return the split parts
@ -233,8 +231,8 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res,
)}`,
res
)}`
);
return Promise.resolve(res);
})
@ -263,8 +261,8 @@ async function validateModelStatus(): Promise<void> {
}).then(async (res: Response) => {
log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res,
)}`,
res
)}`
);
// If the response is OK, check model_loaded status.
if (res.ok) {
@ -315,7 +313,7 @@ function spawnNitroProcess(): Promise<any> {
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
);
subprocess = spawn(
executableOptions.executablePath,
@ -326,7 +324,7 @@ function spawnNitroProcess(): Promise<any> {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
},
}
);
// Handle subprocess output
@ -351,22 +349,6 @@ function spawnNitroProcess(): Promise<any> {
});
}
/**
* Get the system resources information
* TODO: Move to Core so that it can be reused
*/
function getResourcesInfo(): Promise<ResourcesInfo> {
return new Promise(async (resolve) => {
const cpu = await physicalCpuCount();
log(`[NITRO]::CPU informations - ${cpu}`);
const response: ResourcesInfo = {
numCpuPhysicalCore: cpu,
memAvailable: 0,
};
resolve(response);
});
}
/**
* Every module should have a dispose function
* This will be called when the extension is unloaded and should clean up any resources

View File

@ -1,56 +0,0 @@
import os from "os";
import childProcess from "child_process";
function exec(command: string): Promise<string> {
return new Promise((resolve, reject) => {
childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
if (error) {
reject(error);
} else {
resolve(stdout);
}
});
});
}
let amount: number;
const platform = os.platform();
export async function physicalCpuCount(): Promise<number> {
return new Promise((resolve, reject) => {
if (platform === "linux") {
exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
.then((output) => {
amount = parseInt(output.trim(), 10);
resolve(amount);
})
.catch(reject);
} else if (platform === "darwin") {
exec("sysctl -n hw.physicalcpu_max")
.then((output) => {
amount = parseInt(output.trim(), 10);
resolve(amount);
})
.catch(reject);
} else if (platform === "win32") {
exec("WMIC CPU Get NumberOfCores")
.then((output) => {
amount = output
.split(os.EOL)
.map((line: string) => parseInt(line))
.filter((value: number) => !isNaN(value))
.reduce((sum: number, number: number) => sum + number, 1);
resolve(amount);
})
.catch(reject);
} else {
const cores = os.cpus().filter((cpu: any, index: number) => {
const hasHyperthreading = cpu.model.includes("Intel");
const isOdd = index % 2 === 1;
return !hasHyperthreading || isOdd;
});
amount = cores.length;
resolve(amount);
}
});
}

View File

@ -26,6 +26,8 @@
"dotenv": "^16.3.1",
"fastify": "^4.24.3",
"request": "^2.88.2",
"fetch-retry": "^5.0.6",
"tcp-port-used": "^1.0.2",
"request-progress": "^3.0.0"
},
"devDependencies": {
@ -35,6 +37,7 @@
"@typescript-eslint/parser": "^6.7.3",
"eslint-plugin-react": "^7.33.2",
"run-script-os": "^1.1.6",
"@types/tcp-port-used": "^1.0.4",
"typescript": "^5.2.2"
}
}

View File

@ -7,7 +7,7 @@ import {
ThreadState,
Model,
} from '@janhq/core'
import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
import { atom, useAtomValue, useSetAtom } from 'jotai'
import { fileUploadAtom } from '@/containers/Providers/Jotai'
@ -48,7 +48,8 @@ export const useCreateNewThread = () => {
const createNewThread = useSetAtom(createNewThreadAtom)
const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
const updateThread = useSetAtom(updateThreadAtom)
const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
const setFileUpload = useSetAtom(fileUploadAtom)
const { deleteThread } = useDeleteThread()
const requestCreateNewThread = async (

View File

@ -1,5 +1,3 @@
import { useEffect } from 'react'
import {
InferenceEvent,
ExtensionTypeEnum,

View File

@ -1,7 +1,6 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
'use client'
import React, { useEffect, useState } from 'react'
import React, { useCallback, useEffect, useState } from 'react'
import ScrollToBottom from 'react-scroll-to-bottom'
@ -81,14 +80,17 @@ const LocalServerScreen = () => {
const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
useState<boolean>(false)
const handleChangePort = (value: any) => {
const handleChangePort = useCallback(
(value: string) => {
if (Number(value) <= 0 || Number(value) >= 65536) {
setErrorRangePort(true)
} else {
setErrorRangePort(false)
}
setPort(value)
}
},
[setPort]
)
useEffect(() => {
if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) {
@ -98,7 +100,7 @@ const LocalServerScreen = () => {
useEffect(() => {
handleChangePort(port)
}, [])
}, [handleChangePort, port])
return (
<div className="flex h-full w-full">

View File

@ -1,6 +1,4 @@
import React, { useCallback, useEffect, useState } from 'react'
import { fs, AppConfiguration, joinPath, getUserHomePath } from '@janhq/core'
import React, { useCallback, useState } from 'react'
import {
Modal,