feat: add start/stop model via http api (#1862)
Signed-off-by: nam <namnh0122@gmail.com>
This commit is contained in:
parent
4b8b13b5d3
commit
4116aaa98a
@ -2,7 +2,8 @@ import fs from 'fs'
|
||||
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
|
||||
import { join } from 'path'
|
||||
import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
|
||||
import { getJanDataFolderPath } from '../../utils'
|
||||
import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
|
||||
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
|
||||
|
||||
export const getBuilder = async (configuration: RouteConfiguration) => {
|
||||
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
|
||||
@ -309,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
||||
const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
|
||||
|
||||
let apiKey: string | undefined = undefined
|
||||
let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
|
||||
let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
|
||||
|
||||
if (engineConfiguration) {
|
||||
apiKey = engineConfiguration.api_key
|
||||
@ -320,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
||||
'Content-Type': 'text/event-stream',
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
"Access-Control-Allow-Origin": "*"
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
})
|
||||
|
||||
const headers: Record<string, any> = {
|
||||
@ -346,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
|
||||
response.body.pipe(reply.raw)
|
||||
}
|
||||
}
|
||||
|
||||
const getEngineConfiguration = async (engineId: string) => {
|
||||
if (engineId !== 'openai') {
|
||||
return undefined
|
||||
}
|
||||
const directoryPath = join(getJanDataFolderPath(), 'engines')
|
||||
const filePath = join(directoryPath, `${engineId}.json`)
|
||||
const data = await fs.readFileSync(filePath, 'utf-8')
|
||||
return JSON.parse(data)
|
||||
}
|
||||
|
||||
19
core/src/node/api/common/consts.ts
Normal file
19
core/src/node/api/common/consts.ts
Normal file
@ -0,0 +1,19 @@
|
||||
// The PORT to use for the Nitro subprocess
|
||||
export const NITRO_DEFAULT_PORT = 3928
|
||||
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
export const LOCAL_HOST = '127.0.0.1'
|
||||
|
||||
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
|
||||
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
|
||||
351
core/src/node/api/common/startStopModel.ts
Normal file
351
core/src/node/api/common/startStopModel.ts
Normal file
@ -0,0 +1,351 @@
|
||||
import fs from 'fs'
|
||||
import { join } from 'path'
|
||||
import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
|
||||
import { logServer } from '../../log'
|
||||
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||
import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
|
||||
import {
|
||||
LOCAL_HOST,
|
||||
NITRO_DEFAULT_PORT,
|
||||
NITRO_HTTP_KILL_URL,
|
||||
NITRO_HTTP_LOAD_MODEL_URL,
|
||||
NITRO_HTTP_VALIDATE_MODEL_URL,
|
||||
SUPPORTED_MODEL_FORMAT,
|
||||
} from './consts'
|
||||
|
||||
// The subprocess instance for Nitro
|
||||
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||
|
||||
// TODO: move this to core type
|
||||
interface NitroModelSettings extends ModelSettingParams {
|
||||
llama_model_path: string
|
||||
cpu_threads: number
|
||||
}
|
||||
|
||||
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||
try {
|
||||
await runModel(modelId, settingParams)
|
||||
|
||||
return {
|
||||
message: `Model ${modelId} started`,
|
||||
}
|
||||
} catch (e) {
|
||||
return {
|
||||
error: e,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
|
||||
const janDataFolderPath = getJanDataFolderPath()
|
||||
const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
|
||||
|
||||
if (!fs.existsSync(modelFolderFullPath)) {
|
||||
throw `Model not found: ${modelId}`
|
||||
}
|
||||
|
||||
const files: string[] = fs.readdirSync(modelFolderFullPath)
|
||||
|
||||
// Look for GGUF model file
|
||||
const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
|
||||
|
||||
const modelMetadataPath = join(modelFolderFullPath, 'model.json')
|
||||
const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
|
||||
|
||||
if (!ggufBinFile) {
|
||||
throw 'No GGUF model file found'
|
||||
}
|
||||
const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
|
||||
|
||||
const nitroResourceProbe = await getSystemResourceInfo()
|
||||
const nitroModelSettings: NitroModelSettings = {
|
||||
...modelMetadata.settings,
|
||||
...settingParams,
|
||||
llama_model_path: modelBinaryPath,
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||
...(modelMetadata.settings.mmproj && {
|
||||
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
|
||||
}),
|
||||
}
|
||||
|
||||
logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
|
||||
|
||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||
if (modelMetadata.settings.prompt_template) {
|
||||
const promptTemplate = modelMetadata.settings.prompt_template
|
||||
const prompt = promptTemplateConverter(promptTemplate)
|
||||
if (prompt?.error) {
|
||||
return Promise.reject(prompt.error)
|
||||
}
|
||||
nitroModelSettings.system_prompt = prompt.system_prompt
|
||||
nitroModelSettings.user_prompt = prompt.user_prompt
|
||||
nitroModelSettings.ai_prompt = prompt.ai_prompt
|
||||
}
|
||||
|
||||
await runNitroAndLoadModel(modelId, nitroModelSettings)
|
||||
}
|
||||
|
||||
// TODO: move to util
|
||||
const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
|
||||
// Split the string using the markers
|
||||
const systemMarker = '{system_message}'
|
||||
const promptMarker = '{prompt}'
|
||||
|
||||
if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
|
||||
// Find the indices of the markers
|
||||
const systemIndex = promptTemplate.indexOf(systemMarker)
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||
|
||||
// Extract the parts of the string
|
||||
const system_prompt = promptTemplate.substring(0, systemIndex)
|
||||
const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
|
||||
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
|
||||
|
||||
// Return the split parts
|
||||
return { system_prompt, user_prompt, ai_prompt }
|
||||
} else if (promptTemplate.includes(promptMarker)) {
|
||||
// Extract the parts of the string for the case where only promptMarker is present
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||
const user_prompt = promptTemplate.substring(0, promptIndex)
|
||||
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
|
||||
|
||||
// Return the split parts
|
||||
return { user_prompt, ai_prompt }
|
||||
}
|
||||
|
||||
// Return an error if none of the conditions are met
|
||||
return { error: 'Cannot split prompt template' }
|
||||
}
|
||||
|
||||
const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
|
||||
// Gather system information for CPU physical cores and memory
|
||||
const tcpPortUsed = require('tcp-port-used')
|
||||
|
||||
await stopModel(modelId)
|
||||
await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
|
||||
|
||||
/**
|
||||
* There is a problem with Windows process manager
|
||||
* Should wait for awhile to make sure the port is free and subprocess is killed
|
||||
* The tested threshold is 500ms
|
||||
**/
|
||||
if (process.platform === 'win32') {
|
||||
await new Promise((resolve) => setTimeout(resolve, 500))
|
||||
}
|
||||
|
||||
await spawnNitroProcess()
|
||||
await loadLLMModel(modelSettings)
|
||||
await validateModelStatus()
|
||||
}
|
||||
|
||||
const spawnNitroProcess = async (): Promise<void> => {
|
||||
logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
|
||||
|
||||
let binaryFolder = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-nitro-extension',
|
||||
'dist',
|
||||
'bin'
|
||||
)
|
||||
|
||||
let executableOptions = executableNitroFile()
|
||||
const tcpPortUsed = require('tcp-port-used')
|
||||
|
||||
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
|
||||
// Execute the binary
|
||||
logServer(
|
||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
)
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
|
||||
{
|
||||
cwd: binaryFolder,
|
||||
env: {
|
||||
...process.env,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// Handle subprocess output
|
||||
subprocess.stdout.on('data', (data: any) => {
|
||||
logServer(`[NITRO]::Debug: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.stderr.on('data', (data: any) => {
|
||||
logServer(`[NITRO]::Error: ${data}`)
|
||||
})
|
||||
|
||||
subprocess.on('close', (code: any) => {
|
||||
logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
|
||||
subprocess = undefined
|
||||
})
|
||||
|
||||
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
|
||||
logServer(`[NITRO]::Debug: Nitro is ready`)
|
||||
})
|
||||
}
|
||||
|
||||
type NitroExecutableOptions = {
|
||||
executablePath: string
|
||||
cudaVisibleDevices: string
|
||||
}
|
||||
|
||||
const executableNitroFile = (): NitroExecutableOptions => {
|
||||
const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
|
||||
let binaryFolder = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-nitro-extension',
|
||||
'dist',
|
||||
'bin'
|
||||
)
|
||||
|
||||
let cudaVisibleDevices = ''
|
||||
let binaryName = 'nitro'
|
||||
/**
|
||||
* The binary folder is different for each platform.
|
||||
*/
|
||||
if (process.platform === 'win32') {
|
||||
/**
|
||||
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
|
||||
*/
|
||||
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
|
||||
if (nvidiaInfo['run_mode'] === 'cpu') {
|
||||
binaryFolder = join(binaryFolder, 'win-cpu')
|
||||
} else {
|
||||
if (nvidiaInfo['cuda'].version === '12') {
|
||||
binaryFolder = join(binaryFolder, 'win-cuda-12-0')
|
||||
} else {
|
||||
binaryFolder = join(binaryFolder, 'win-cuda-11-7')
|
||||
}
|
||||
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
||||
}
|
||||
binaryName = 'nitro.exe'
|
||||
} else if (process.platform === 'darwin') {
|
||||
/**
|
||||
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
|
||||
*/
|
||||
if (process.arch === 'arm64') {
|
||||
binaryFolder = join(binaryFolder, 'mac-arm64')
|
||||
} else {
|
||||
binaryFolder = join(binaryFolder, 'mac-x64')
|
||||
}
|
||||
} else {
|
||||
/**
|
||||
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
|
||||
*/
|
||||
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
|
||||
if (nvidiaInfo['run_mode'] === 'cpu') {
|
||||
binaryFolder = join(binaryFolder, 'linux-cpu')
|
||||
} else {
|
||||
if (nvidiaInfo['cuda'].version === '12') {
|
||||
binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
|
||||
} else {
|
||||
binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
|
||||
}
|
||||
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
executablePath: join(binaryFolder, binaryName),
|
||||
cudaVisibleDevices,
|
||||
}
|
||||
}
|
||||
|
||||
const validateModelStatus = async (): Promise<void> => {
|
||||
// Send a GET request to the validation URL.
|
||||
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
||||
const fetchRT = require('fetch-retry')
|
||||
const fetchRetry = fetchRT(fetch)
|
||||
|
||||
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
||||
method: 'GET',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
retries: 5,
|
||||
retryDelay: 500,
|
||||
}).then(async (res: Response) => {
|
||||
logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
|
||||
// If the response is OK, check model_loaded status.
|
||||
if (res.ok) {
|
||||
const body = await res.json()
|
||||
// If the model is loaded, return an empty object.
|
||||
// Otherwise, return an object with an error message.
|
||||
if (body.model_loaded) {
|
||||
return Promise.resolve()
|
||||
}
|
||||
}
|
||||
return Promise.reject('Validate model status failed')
|
||||
})
|
||||
}
|
||||
|
||||
const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
|
||||
logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
||||
const fetchRT = require('fetch-retry')
|
||||
const fetchRetry = fetchRT(fetch)
|
||||
|
||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(settings),
|
||||
retries: 3,
|
||||
retryDelay: 500,
|
||||
})
|
||||
.then((res: any) => {
|
||||
logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
|
||||
return Promise.resolve(res)
|
||||
})
|
||||
.catch((err: any) => {
|
||||
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
|
||||
return Promise.reject()
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop model and kill nitro process.
|
||||
*/
|
||||
export const stopModel = async (_modelId: string) => {
|
||||
if (!subprocess) {
|
||||
return {
|
||||
error: "Model isn't running",
|
||||
}
|
||||
}
|
||||
return new Promise((resolve, reject) => {
|
||||
const controller = new AbortController()
|
||||
setTimeout(() => {
|
||||
controller.abort()
|
||||
reject({
|
||||
error: 'Failed to stop model: Timedout',
|
||||
})
|
||||
}, 5000)
|
||||
const tcpPortUsed = require('tcp-port-used')
|
||||
logServer(`[NITRO]::Debug: Request to kill Nitro`)
|
||||
|
||||
fetch(NITRO_HTTP_KILL_URL, {
|
||||
method: 'DELETE',
|
||||
signal: controller.signal,
|
||||
})
|
||||
.then(() => {
|
||||
subprocess?.kill()
|
||||
subprocess = undefined
|
||||
})
|
||||
.catch(() => {
|
||||
// don't need to do anything, we still kill the subprocess
|
||||
})
|
||||
.then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
|
||||
.then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
|
||||
.then(() =>
|
||||
resolve({
|
||||
message: 'Model stopped',
|
||||
})
|
||||
)
|
||||
})
|
||||
}
|
||||
@ -10,6 +10,8 @@ import {
|
||||
} from '../common/builder'
|
||||
|
||||
import { JanApiRouteConfiguration } from '../common/configuration'
|
||||
import { startModel, stopModel } from '../common/startStopModel'
|
||||
import { ModelSettingParams } from '../../../types'
|
||||
|
||||
export const commonRouter = async (app: HttpServer) => {
|
||||
// Common Routes
|
||||
@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
|
||||
app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
|
||||
|
||||
app.get(`/${key}/:id`, async (request: any) =>
|
||||
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
|
||||
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
|
||||
)
|
||||
|
||||
app.delete(`/${key}/:id`, async (request: any) =>
|
||||
deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
|
||||
deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
|
||||
)
|
||||
})
|
||||
|
||||
// Download Model Routes
|
||||
app.get(`/models/download/:modelId`, async (request: any) =>
|
||||
downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
|
||||
downloadModel(request.params.modelId, {
|
||||
ignoreSSL: request.query.ignoreSSL === 'true',
|
||||
proxy: request.query.proxy,
|
||||
})
|
||||
)
|
||||
|
||||
app.put(`/models/:modelId/start`, async (request: any) => {
|
||||
let settingParams: ModelSettingParams | undefined = undefined
|
||||
if (Object.keys(request.body).length !== 0) {
|
||||
settingParams = JSON.parse(request.body) as ModelSettingParams
|
||||
}
|
||||
|
||||
return startModel(request.params.modelId, settingParams)
|
||||
})
|
||||
|
||||
app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
|
||||
|
||||
// Chat Completion Routes
|
||||
app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
|
||||
|
||||
|
||||
@ -1,16 +1,18 @@
|
||||
import { AppConfiguration } from "../../types";
|
||||
import { join } from "path";
|
||||
import fs from "fs";
|
||||
import os from "os";
|
||||
import { AppConfiguration, SystemResourceInfo } from '../../types'
|
||||
import { join } from 'path'
|
||||
import fs from 'fs'
|
||||
import os from 'os'
|
||||
import { log, logServer } from '../log'
|
||||
import childProcess from 'child_process'
|
||||
|
||||
// TODO: move this to core
|
||||
const configurationFileName = "settings.json";
|
||||
const configurationFileName = 'settings.json'
|
||||
|
||||
// TODO: do no specify app name in framework module
|
||||
const defaultJanDataFolder = join(os.homedir(), "jan");
|
||||
const defaultJanDataFolder = join(os.homedir(), 'jan')
|
||||
const defaultAppConfig: AppConfiguration = {
|
||||
data_folder: defaultJanDataFolder,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting App Configurations.
|
||||
@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
|
||||
export const getAppConfigurations = (): AppConfiguration => {
|
||||
// Retrieve Application Support folder path
|
||||
// Fallback to user home directory if not found
|
||||
const configurationFile = getConfigurationFilePath();
|
||||
const configurationFile = getConfigurationFilePath()
|
||||
|
||||
if (!fs.existsSync(configurationFile)) {
|
||||
// create default app config if we don't have one
|
||||
console.debug(`App config not found, creating default config at ${configurationFile}`);
|
||||
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
|
||||
return defaultAppConfig;
|
||||
console.debug(`App config not found, creating default config at ${configurationFile}`)
|
||||
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
|
||||
return defaultAppConfig
|
||||
}
|
||||
|
||||
try {
|
||||
const appConfigurations: AppConfiguration = JSON.parse(
|
||||
fs.readFileSync(configurationFile, "utf-8"),
|
||||
);
|
||||
return appConfigurations;
|
||||
fs.readFileSync(configurationFile, 'utf-8')
|
||||
)
|
||||
return appConfigurations
|
||||
} catch (err) {
|
||||
console.error(`Failed to read app config, return default config instead! Err: ${err}`);
|
||||
return defaultAppConfig;
|
||||
console.error(`Failed to read app config, return default config instead! Err: ${err}`)
|
||||
return defaultAppConfig
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const getConfigurationFilePath = () =>
|
||||
join(
|
||||
global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
|
||||
configurationFileName,
|
||||
);
|
||||
global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
|
||||
configurationFileName
|
||||
)
|
||||
|
||||
export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
|
||||
const configurationFile = getConfigurationFilePath();
|
||||
console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
|
||||
const configurationFile = getConfigurationFilePath()
|
||||
console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
|
||||
|
||||
fs.writeFileSync(configurationFile, JSON.stringify(configuration));
|
||||
return Promise.resolve();
|
||||
};
|
||||
fs.writeFileSync(configurationFile, JSON.stringify(configuration))
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to get server log path
|
||||
@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
|
||||
* @returns {string} The log path.
|
||||
*/
|
||||
export const getServerLogPath = (): string => {
|
||||
const appConfigurations = getAppConfigurations();
|
||||
const logFolderPath = join(appConfigurations.data_folder, "logs");
|
||||
const appConfigurations = getAppConfigurations()
|
||||
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
||||
if (!fs.existsSync(logFolderPath)) {
|
||||
fs.mkdirSync(logFolderPath, { recursive: true });
|
||||
fs.mkdirSync(logFolderPath, { recursive: true })
|
||||
}
|
||||
return join(logFolderPath, "server.log");
|
||||
};
|
||||
return join(logFolderPath, 'server.log')
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to get app log path
|
||||
@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
|
||||
* @returns {string} The log path.
|
||||
*/
|
||||
export const getAppLogPath = (): string => {
|
||||
const appConfigurations = getAppConfigurations();
|
||||
const logFolderPath = join(appConfigurations.data_folder, "logs");
|
||||
const appConfigurations = getAppConfigurations()
|
||||
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
||||
if (!fs.existsSync(logFolderPath)) {
|
||||
fs.mkdirSync(logFolderPath, { recursive: true });
|
||||
fs.mkdirSync(logFolderPath, { recursive: true })
|
||||
}
|
||||
return join(logFolderPath, "app.log");
|
||||
};
|
||||
return join(logFolderPath, 'app.log')
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to get data folder path
|
||||
@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
|
||||
* @returns {string} The data folder path.
|
||||
*/
|
||||
export const getJanDataFolderPath = (): string => {
|
||||
const appConfigurations = getAppConfigurations();
|
||||
return appConfigurations.data_folder;
|
||||
};
|
||||
const appConfigurations = getAppConfigurations()
|
||||
return appConfigurations.data_folder
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to get extension path
|
||||
@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
|
||||
* @returns {string} The extensions path.
|
||||
*/
|
||||
export const getJanExtensionsPath = (): string => {
|
||||
const appConfigurations = getAppConfigurations();
|
||||
return join(appConfigurations.data_folder, "extensions");
|
||||
};
|
||||
const appConfigurations = getAppConfigurations()
|
||||
return join(appConfigurations.data_folder, 'extensions')
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to physical cpu count
|
||||
*
|
||||
* @returns {number} The physical cpu count.
|
||||
*/
|
||||
export const physicalCpuCount = async (): Promise<number> => {
|
||||
const platform = os.platform()
|
||||
if (platform === 'linux') {
|
||||
const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
|
||||
return parseInt(output.trim(), 10)
|
||||
} else if (platform === 'darwin') {
|
||||
const output = await exec('sysctl -n hw.physicalcpu_max')
|
||||
return parseInt(output.trim(), 10)
|
||||
} else if (platform === 'win32') {
|
||||
const output = await exec('WMIC CPU Get NumberOfCores')
|
||||
return output
|
||||
.split(os.EOL)
|
||||
.map((line: string) => parseInt(line))
|
||||
.filter((value: number) => !isNaN(value))
|
||||
.reduce((sum: number, number: number) => sum + number, 1)
|
||||
} else {
|
||||
const cores = os.cpus().filter((cpu: any, index: number) => {
|
||||
const hasHyperthreading = cpu.model.includes('Intel')
|
||||
const isOdd = index % 2 === 1
|
||||
return !hasHyperthreading || isOdd
|
||||
})
|
||||
return cores.length
|
||||
}
|
||||
}
|
||||
|
||||
const exec = async (command: string): Promise<string> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
|
||||
if (error) {
|
||||
reject(error)
|
||||
} else {
|
||||
resolve(stdout)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
|
||||
const cpu = await physicalCpuCount()
|
||||
const message = `[NITRO]::CPU informations - ${cpu}`
|
||||
log(message)
|
||||
logServer(message)
|
||||
|
||||
return {
|
||||
numCpuPhysicalCore: cpu,
|
||||
memAvailable: 0, // TODO: this should not be 0
|
||||
}
|
||||
}
|
||||
|
||||
export const getEngineConfiguration = async (engineId: string) => {
|
||||
if (engineId !== 'openai') {
|
||||
return undefined
|
||||
}
|
||||
const directoryPath = join(getJanDataFolderPath(), 'engines')
|
||||
const filePath = join(directoryPath, `${engineId}.json`)
|
||||
const data = fs.readFileSync(filePath, 'utf-8')
|
||||
return JSON.parse(data)
|
||||
}
|
||||
|
||||
@ -6,3 +6,4 @@ export * from './inference'
|
||||
export * from './monitoring'
|
||||
export * from './file'
|
||||
export * from './config'
|
||||
export * from './miscellaneous'
|
||||
|
||||
2
core/src/types/miscellaneous/index.ts
Normal file
2
core/src/types/miscellaneous/index.ts
Normal file
@ -0,0 +1,2 @@
|
||||
export * from './systemResourceInfo'
|
||||
export * from './promptTemplate'
|
||||
6
core/src/types/miscellaneous/promptTemplate.ts
Normal file
6
core/src/types/miscellaneous/promptTemplate.ts
Normal file
@ -0,0 +1,6 @@
|
||||
export type PromptTemplate = {
|
||||
system_prompt?: string
|
||||
ai_prompt?: string
|
||||
user_prompt?: string
|
||||
error?: string
|
||||
}
|
||||
4
core/src/types/miscellaneous/systemResourceInfo.ts
Normal file
4
core/src/types/miscellaneous/systemResourceInfo.ts
Normal file
@ -0,0 +1,4 @@
|
||||
export type SystemResourceInfo = {
|
||||
numCpuPhysicalCore: number
|
||||
memAvailable: number
|
||||
}
|
||||
@ -123,6 +123,7 @@ export type ModelSettingParams = {
|
||||
user_prompt?: string
|
||||
llama_model_path?: string
|
||||
mmproj?: string
|
||||
cont_batching?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -2,22 +2,6 @@ declare const NODE: string;
|
||||
declare const INFERENCE_URL: string;
|
||||
declare const TROUBLESHOOTING_URL: string;
|
||||
|
||||
/**
|
||||
* The parameters for the initModel function.
|
||||
* @property settings - The settings for the machine learning model.
|
||||
* @property settings.ctx_len - The context length.
|
||||
* @property settings.ngl - The number of generated tokens.
|
||||
* @property settings.cont_batching - Whether to use continuous batching.
|
||||
* @property settings.embedding - Whether to use embedding.
|
||||
*/
|
||||
interface EngineSettings {
|
||||
ctx_len: number;
|
||||
ngl: number;
|
||||
cpu_threads: number;
|
||||
cont_batching: boolean;
|
||||
embedding: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* The response from the initModel function.
|
||||
* @property error - An error message if the model fails to load.
|
||||
@ -26,8 +10,3 @@ interface ModelOperationResponse {
|
||||
error?: any;
|
||||
modelFile?: string;
|
||||
}
|
||||
|
||||
interface ResourcesInfo {
|
||||
numCpuPhysicalCore: number;
|
||||
memAvailable: number;
|
||||
}
|
||||
@ -24,6 +24,7 @@ import {
|
||||
MessageEvent,
|
||||
ModelEvent,
|
||||
InferenceEvent,
|
||||
ModelSettingParams,
|
||||
} from "@janhq/core";
|
||||
import { requestInference } from "./helpers/sse";
|
||||
import { ulid } from "ulid";
|
||||
@ -45,7 +46,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
||||
|
||||
private _currentModel: Model | undefined;
|
||||
|
||||
private _engineSettings: EngineSettings = {
|
||||
private _engineSettings: ModelSettingParams = {
|
||||
ctx_len: 2048,
|
||||
ngl: 100,
|
||||
cpu_threads: 1,
|
||||
|
||||
@ -3,11 +3,19 @@ import path from "path";
|
||||
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
||||
import tcpPortUsed from "tcp-port-used";
|
||||
import fetchRT from "fetch-retry";
|
||||
import { log, getJanDataFolderPath } from "@janhq/core/node";
|
||||
import {
|
||||
log,
|
||||
getJanDataFolderPath,
|
||||
getSystemResourceInfo,
|
||||
} from "@janhq/core/node";
|
||||
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
|
||||
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
|
||||
import {
|
||||
Model,
|
||||
InferenceEngine,
|
||||
ModelSettingParams,
|
||||
PromptTemplate,
|
||||
} from "@janhq/core";
|
||||
import { executableNitroFile } from "./execute";
|
||||
import { physicalCpuCount } from "./utils";
|
||||
|
||||
// Polyfill fetch with retry
|
||||
const fetchRetry = fetchRT(fetch);
|
||||
@ -20,16 +28,6 @@ interface ModelInitOptions {
|
||||
model: Model;
|
||||
}
|
||||
|
||||
/**
|
||||
* The response object of Prompt Template parsing.
|
||||
*/
|
||||
interface PromptTemplate {
|
||||
system_prompt?: string;
|
||||
ai_prompt?: string;
|
||||
user_prompt?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Model setting args for Nitro model load.
|
||||
*/
|
||||
@ -78,7 +76,7 @@ function stopModel(): Promise<void> {
|
||||
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
||||
*/
|
||||
async function runModel(
|
||||
wrapper: ModelInitOptions,
|
||||
wrapper: ModelInitOptions
|
||||
): Promise<ModelOperationResponse | void> {
|
||||
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||
// Not a nitro model
|
||||
@ -96,7 +94,7 @@ async function runModel(
|
||||
const ggufBinFile = files.find(
|
||||
(file) =>
|
||||
file === path.basename(currentModelFile) ||
|
||||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
|
||||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
|
||||
);
|
||||
|
||||
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
|
||||
@ -106,7 +104,7 @@ async function runModel(
|
||||
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||
return Promise.reject("Not a nitro model");
|
||||
} else {
|
||||
const nitroResourceProbe = await getResourcesInfo();
|
||||
const nitroResourceProbe = await getSystemResourceInfo();
|
||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||
if (wrapper.model.settings.prompt_template) {
|
||||
const promptTemplate = wrapper.model.settings.prompt_template;
|
||||
@ -191,10 +189,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
||||
const system_prompt = promptTemplate.substring(0, systemIndex);
|
||||
const user_prompt = promptTemplate.substring(
|
||||
systemIndex + systemMarker.length,
|
||||
promptIndex,
|
||||
promptIndex
|
||||
);
|
||||
const ai_prompt = promptTemplate.substring(
|
||||
promptIndex + promptMarker.length,
|
||||
promptIndex + promptMarker.length
|
||||
);
|
||||
|
||||
// Return the split parts
|
||||
@ -204,7 +202,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
||||
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||
const user_prompt = promptTemplate.substring(0, promptIndex);
|
||||
const ai_prompt = promptTemplate.substring(
|
||||
promptIndex + promptMarker.length,
|
||||
promptIndex + promptMarker.length
|
||||
);
|
||||
|
||||
// Return the split parts
|
||||
@ -233,8 +231,8 @@ function loadLLMModel(settings: any): Promise<Response> {
|
||||
.then((res) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
||||
res,
|
||||
)}`,
|
||||
res
|
||||
)}`
|
||||
);
|
||||
return Promise.resolve(res);
|
||||
})
|
||||
@ -263,8 +261,8 @@ async function validateModelStatus(): Promise<void> {
|
||||
}).then(async (res: Response) => {
|
||||
log(
|
||||
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
||||
res,
|
||||
)}`,
|
||||
res
|
||||
)}`
|
||||
);
|
||||
// If the response is OK, check model_loaded status.
|
||||
if (res.ok) {
|
||||
@ -315,7 +313,7 @@ function spawnNitroProcess(): Promise<any> {
|
||||
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
|
||||
// Execute the binary
|
||||
log(
|
||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
|
||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||
);
|
||||
subprocess = spawn(
|
||||
executableOptions.executablePath,
|
||||
@ -326,7 +324,7 @@ function spawnNitroProcess(): Promise<any> {
|
||||
...process.env,
|
||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||
},
|
||||
},
|
||||
}
|
||||
);
|
||||
|
||||
// Handle subprocess output
|
||||
@ -351,22 +349,6 @@ function spawnNitroProcess(): Promise<any> {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the system resources information
|
||||
* TODO: Move to Core so that it can be reused
|
||||
*/
|
||||
function getResourcesInfo(): Promise<ResourcesInfo> {
|
||||
return new Promise(async (resolve) => {
|
||||
const cpu = await physicalCpuCount();
|
||||
log(`[NITRO]::CPU informations - ${cpu}`);
|
||||
const response: ResourcesInfo = {
|
||||
numCpuPhysicalCore: cpu,
|
||||
memAvailable: 0,
|
||||
};
|
||||
resolve(response);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Every module should have a dispose function
|
||||
* This will be called when the extension is unloaded and should clean up any resources
|
||||
|
||||
@ -1,56 +0,0 @@
|
||||
import os from "os";
|
||||
import childProcess from "child_process";
|
||||
|
||||
function exec(command: string): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
|
||||
if (error) {
|
||||
reject(error);
|
||||
} else {
|
||||
resolve(stdout);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
let amount: number;
|
||||
const platform = os.platform();
|
||||
|
||||
export async function physicalCpuCount(): Promise<number> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (platform === "linux") {
|
||||
exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
|
||||
.then((output) => {
|
||||
amount = parseInt(output.trim(), 10);
|
||||
resolve(amount);
|
||||
})
|
||||
.catch(reject);
|
||||
} else if (platform === "darwin") {
|
||||
exec("sysctl -n hw.physicalcpu_max")
|
||||
.then((output) => {
|
||||
amount = parseInt(output.trim(), 10);
|
||||
resolve(amount);
|
||||
})
|
||||
.catch(reject);
|
||||
} else if (platform === "win32") {
|
||||
exec("WMIC CPU Get NumberOfCores")
|
||||
.then((output) => {
|
||||
amount = output
|
||||
.split(os.EOL)
|
||||
.map((line: string) => parseInt(line))
|
||||
.filter((value: number) => !isNaN(value))
|
||||
.reduce((sum: number, number: number) => sum + number, 1);
|
||||
resolve(amount);
|
||||
})
|
||||
.catch(reject);
|
||||
} else {
|
||||
const cores = os.cpus().filter((cpu: any, index: number) => {
|
||||
const hasHyperthreading = cpu.model.includes("Intel");
|
||||
const isOdd = index % 2 === 1;
|
||||
return !hasHyperthreading || isOdd;
|
||||
});
|
||||
amount = cores.length;
|
||||
resolve(amount);
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -26,6 +26,8 @@
|
||||
"dotenv": "^16.3.1",
|
||||
"fastify": "^4.24.3",
|
||||
"request": "^2.88.2",
|
||||
"fetch-retry": "^5.0.6",
|
||||
"tcp-port-used": "^1.0.2",
|
||||
"request-progress": "^3.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
@ -35,6 +37,7 @@
|
||||
"@typescript-eslint/parser": "^6.7.3",
|
||||
"eslint-plugin-react": "^7.33.2",
|
||||
"run-script-os": "^1.1.6",
|
||||
"@types/tcp-port-used": "^1.0.4",
|
||||
"typescript": "^5.2.2"
|
||||
}
|
||||
}
|
||||
|
||||
@ -7,7 +7,7 @@ import {
|
||||
ThreadState,
|
||||
Model,
|
||||
} from '@janhq/core'
|
||||
import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
|
||||
import { atom, useAtomValue, useSetAtom } from 'jotai'
|
||||
|
||||
import { fileUploadAtom } from '@/containers/Providers/Jotai'
|
||||
|
||||
@ -48,7 +48,8 @@ export const useCreateNewThread = () => {
|
||||
const createNewThread = useSetAtom(createNewThreadAtom)
|
||||
const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
|
||||
const updateThread = useSetAtom(updateThreadAtom)
|
||||
const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
|
||||
|
||||
const setFileUpload = useSetAtom(fileUploadAtom)
|
||||
const { deleteThread } = useDeleteThread()
|
||||
|
||||
const requestCreateNewThread = async (
|
||||
|
||||
@ -1,5 +1,3 @@
|
||||
import { useEffect } from 'react'
|
||||
|
||||
import {
|
||||
InferenceEvent,
|
||||
ExtensionTypeEnum,
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
'use client'
|
||||
|
||||
import React, { useEffect, useState } from 'react'
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
|
||||
import ScrollToBottom from 'react-scroll-to-bottom'
|
||||
|
||||
@ -81,14 +80,17 @@ const LocalServerScreen = () => {
|
||||
const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
|
||||
useState<boolean>(false)
|
||||
|
||||
const handleChangePort = (value: any) => {
|
||||
const handleChangePort = useCallback(
|
||||
(value: string) => {
|
||||
if (Number(value) <= 0 || Number(value) >= 65536) {
|
||||
setErrorRangePort(true)
|
||||
} else {
|
||||
setErrorRangePort(false)
|
||||
}
|
||||
setPort(value)
|
||||
}
|
||||
},
|
||||
[setPort]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) {
|
||||
@ -98,7 +100,7 @@ const LocalServerScreen = () => {
|
||||
|
||||
useEffect(() => {
|
||||
handleChangePort(port)
|
||||
}, [])
|
||||
}, [handleChangePort, port])
|
||||
|
||||
return (
|
||||
<div className="flex h-full w-full">
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
import React, { useCallback, useEffect, useState } from 'react'
|
||||
|
||||
import { fs, AppConfiguration, joinPath, getUserHomePath } from '@janhq/core'
|
||||
import React, { useCallback, useState } from 'react'
|
||||
|
||||
import {
|
||||
Modal,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user