feat: add start/stop model via http api (#1862)
Signed-off-by: nam <namnh0122@gmail.com>
This commit is contained in:
parent
4b8b13b5d3
commit
4116aaa98a
@ -2,7 +2,8 @@ import fs from 'fs'
|
|||||||
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
|
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
|
||||||
import { join } from 'path'
|
import { join } from 'path'
|
||||||
import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
|
import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
|
||||||
import { getJanDataFolderPath } from '../../utils'
|
import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
|
||||||
|
import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
|
||||||
|
|
||||||
export const getBuilder = async (configuration: RouteConfiguration) => {
|
export const getBuilder = async (configuration: RouteConfiguration) => {
|
||||||
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
|
const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
|
||||||
@ -309,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
|||||||
const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
|
const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
|
||||||
|
|
||||||
let apiKey: string | undefined = undefined
|
let apiKey: string | undefined = undefined
|
||||||
let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
|
let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
|
||||||
|
|
||||||
if (engineConfiguration) {
|
if (engineConfiguration) {
|
||||||
apiKey = engineConfiguration.api_key
|
apiKey = engineConfiguration.api_key
|
||||||
@ -320,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
|||||||
'Content-Type': 'text/event-stream',
|
'Content-Type': 'text/event-stream',
|
||||||
'Cache-Control': 'no-cache',
|
'Cache-Control': 'no-cache',
|
||||||
'Connection': 'keep-alive',
|
'Connection': 'keep-alive',
|
||||||
"Access-Control-Allow-Origin": "*"
|
'Access-Control-Allow-Origin': '*',
|
||||||
})
|
})
|
||||||
|
|
||||||
const headers: Record<string, any> = {
|
const headers: Record<string, any> = {
|
||||||
@ -346,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
|
|||||||
response.body.pipe(reply.raw)
|
response.body.pipe(reply.raw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const getEngineConfiguration = async (engineId: string) => {
|
|
||||||
if (engineId !== 'openai') {
|
|
||||||
return undefined
|
|
||||||
}
|
|
||||||
const directoryPath = join(getJanDataFolderPath(), 'engines')
|
|
||||||
const filePath = join(directoryPath, `${engineId}.json`)
|
|
||||||
const data = await fs.readFileSync(filePath, 'utf-8')
|
|
||||||
return JSON.parse(data)
|
|
||||||
}
|
|
||||||
|
|||||||
19
core/src/node/api/common/consts.ts
Normal file
19
core/src/node/api/common/consts.ts
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
// The PORT to use for the Nitro subprocess
|
||||||
|
export const NITRO_DEFAULT_PORT = 3928
|
||||||
|
|
||||||
|
// The HOST address to use for the Nitro subprocess
|
||||||
|
export const LOCAL_HOST = '127.0.0.1'
|
||||||
|
|
||||||
|
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||||
|
|
||||||
|
// The URL for the Nitro subprocess
|
||||||
|
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
||||||
|
// The URL for the Nitro subprocess to load a model
|
||||||
|
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
|
||||||
|
// The URL for the Nitro subprocess to validate a model
|
||||||
|
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
|
||||||
|
|
||||||
|
// The URL for the Nitro subprocess to kill itself
|
||||||
|
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||||
|
|
||||||
|
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
|
||||||
351
core/src/node/api/common/startStopModel.ts
Normal file
351
core/src/node/api/common/startStopModel.ts
Normal file
@ -0,0 +1,351 @@
|
|||||||
|
import fs from 'fs'
|
||||||
|
import { join } from 'path'
|
||||||
|
import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
|
||||||
|
import { logServer } from '../../log'
|
||||||
|
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
|
||||||
|
import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
|
||||||
|
import {
|
||||||
|
LOCAL_HOST,
|
||||||
|
NITRO_DEFAULT_PORT,
|
||||||
|
NITRO_HTTP_KILL_URL,
|
||||||
|
NITRO_HTTP_LOAD_MODEL_URL,
|
||||||
|
NITRO_HTTP_VALIDATE_MODEL_URL,
|
||||||
|
SUPPORTED_MODEL_FORMAT,
|
||||||
|
} from './consts'
|
||||||
|
|
||||||
|
// The subprocess instance for Nitro
|
||||||
|
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
|
||||||
|
|
||||||
|
// TODO: move this to core type
|
||||||
|
interface NitroModelSettings extends ModelSettingParams {
|
||||||
|
llama_model_path: string
|
||||||
|
cpu_threads: number
|
||||||
|
}
|
||||||
|
|
||||||
|
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||||
|
try {
|
||||||
|
await runModel(modelId, settingParams)
|
||||||
|
|
||||||
|
return {
|
||||||
|
message: `Model ${modelId} started`,
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
return {
|
||||||
|
error: e,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
|
||||||
|
const janDataFolderPath = getJanDataFolderPath()
|
||||||
|
const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
|
||||||
|
|
||||||
|
if (!fs.existsSync(modelFolderFullPath)) {
|
||||||
|
throw `Model not found: ${modelId}`
|
||||||
|
}
|
||||||
|
|
||||||
|
const files: string[] = fs.readdirSync(modelFolderFullPath)
|
||||||
|
|
||||||
|
// Look for GGUF model file
|
||||||
|
const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
|
||||||
|
|
||||||
|
const modelMetadataPath = join(modelFolderFullPath, 'model.json')
|
||||||
|
const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
|
||||||
|
|
||||||
|
if (!ggufBinFile) {
|
||||||
|
throw 'No GGUF model file found'
|
||||||
|
}
|
||||||
|
const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
|
||||||
|
|
||||||
|
const nitroResourceProbe = await getSystemResourceInfo()
|
||||||
|
const nitroModelSettings: NitroModelSettings = {
|
||||||
|
...modelMetadata.settings,
|
||||||
|
...settingParams,
|
||||||
|
llama_model_path: modelBinaryPath,
|
||||||
|
// This is critical and requires real CPU physical core count (or performance core)
|
||||||
|
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||||
|
...(modelMetadata.settings.mmproj && {
|
||||||
|
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
|
||||||
|
logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
|
||||||
|
|
||||||
|
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||||
|
if (modelMetadata.settings.prompt_template) {
|
||||||
|
const promptTemplate = modelMetadata.settings.prompt_template
|
||||||
|
const prompt = promptTemplateConverter(promptTemplate)
|
||||||
|
if (prompt?.error) {
|
||||||
|
return Promise.reject(prompt.error)
|
||||||
|
}
|
||||||
|
nitroModelSettings.system_prompt = prompt.system_prompt
|
||||||
|
nitroModelSettings.user_prompt = prompt.user_prompt
|
||||||
|
nitroModelSettings.ai_prompt = prompt.ai_prompt
|
||||||
|
}
|
||||||
|
|
||||||
|
await runNitroAndLoadModel(modelId, nitroModelSettings)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: move to util
|
||||||
|
const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
|
||||||
|
// Split the string using the markers
|
||||||
|
const systemMarker = '{system_message}'
|
||||||
|
const promptMarker = '{prompt}'
|
||||||
|
|
||||||
|
if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
|
||||||
|
// Find the indices of the markers
|
||||||
|
const systemIndex = promptTemplate.indexOf(systemMarker)
|
||||||
|
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||||
|
|
||||||
|
// Extract the parts of the string
|
||||||
|
const system_prompt = promptTemplate.substring(0, systemIndex)
|
||||||
|
const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
|
||||||
|
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
|
||||||
|
|
||||||
|
// Return the split parts
|
||||||
|
return { system_prompt, user_prompt, ai_prompt }
|
||||||
|
} else if (promptTemplate.includes(promptMarker)) {
|
||||||
|
// Extract the parts of the string for the case where only promptMarker is present
|
||||||
|
const promptIndex = promptTemplate.indexOf(promptMarker)
|
||||||
|
const user_prompt = promptTemplate.substring(0, promptIndex)
|
||||||
|
const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
|
||||||
|
|
||||||
|
// Return the split parts
|
||||||
|
return { user_prompt, ai_prompt }
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an error if none of the conditions are met
|
||||||
|
return { error: 'Cannot split prompt template' }
|
||||||
|
}
|
||||||
|
|
||||||
|
const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
|
||||||
|
// Gather system information for CPU physical cores and memory
|
||||||
|
const tcpPortUsed = require('tcp-port-used')
|
||||||
|
|
||||||
|
await stopModel(modelId)
|
||||||
|
await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
|
||||||
|
|
||||||
|
/**
|
||||||
|
* There is a problem with Windows process manager
|
||||||
|
* Should wait for awhile to make sure the port is free and subprocess is killed
|
||||||
|
* The tested threshold is 500ms
|
||||||
|
**/
|
||||||
|
if (process.platform === 'win32') {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 500))
|
||||||
|
}
|
||||||
|
|
||||||
|
await spawnNitroProcess()
|
||||||
|
await loadLLMModel(modelSettings)
|
||||||
|
await validateModelStatus()
|
||||||
|
}
|
||||||
|
|
||||||
|
const spawnNitroProcess = async (): Promise<void> => {
|
||||||
|
logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
|
||||||
|
|
||||||
|
let binaryFolder = join(
|
||||||
|
getJanExtensionsPath(),
|
||||||
|
'@janhq',
|
||||||
|
'inference-nitro-extension',
|
||||||
|
'dist',
|
||||||
|
'bin'
|
||||||
|
)
|
||||||
|
|
||||||
|
let executableOptions = executableNitroFile()
|
||||||
|
const tcpPortUsed = require('tcp-port-used')
|
||||||
|
|
||||||
|
const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
|
||||||
|
// Execute the binary
|
||||||
|
logServer(
|
||||||
|
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||||
|
)
|
||||||
|
subprocess = spawn(
|
||||||
|
executableOptions.executablePath,
|
||||||
|
['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
|
||||||
|
{
|
||||||
|
cwd: binaryFolder,
|
||||||
|
env: {
|
||||||
|
...process.env,
|
||||||
|
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// Handle subprocess output
|
||||||
|
subprocess.stdout.on('data', (data: any) => {
|
||||||
|
logServer(`[NITRO]::Debug: ${data}`)
|
||||||
|
})
|
||||||
|
|
||||||
|
subprocess.stderr.on('data', (data: any) => {
|
||||||
|
logServer(`[NITRO]::Error: ${data}`)
|
||||||
|
})
|
||||||
|
|
||||||
|
subprocess.on('close', (code: any) => {
|
||||||
|
logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
|
||||||
|
subprocess = undefined
|
||||||
|
})
|
||||||
|
|
||||||
|
tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
|
||||||
|
logServer(`[NITRO]::Debug: Nitro is ready`)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
type NitroExecutableOptions = {
|
||||||
|
executablePath: string
|
||||||
|
cudaVisibleDevices: string
|
||||||
|
}
|
||||||
|
|
||||||
|
const executableNitroFile = (): NitroExecutableOptions => {
|
||||||
|
const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
|
||||||
|
let binaryFolder = join(
|
||||||
|
getJanExtensionsPath(),
|
||||||
|
'@janhq',
|
||||||
|
'inference-nitro-extension',
|
||||||
|
'dist',
|
||||||
|
'bin'
|
||||||
|
)
|
||||||
|
|
||||||
|
let cudaVisibleDevices = ''
|
||||||
|
let binaryName = 'nitro'
|
||||||
|
/**
|
||||||
|
* The binary folder is different for each platform.
|
||||||
|
*/
|
||||||
|
if (process.platform === 'win32') {
|
||||||
|
/**
|
||||||
|
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
|
||||||
|
*/
|
||||||
|
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
|
||||||
|
if (nvidiaInfo['run_mode'] === 'cpu') {
|
||||||
|
binaryFolder = join(binaryFolder, 'win-cpu')
|
||||||
|
} else {
|
||||||
|
if (nvidiaInfo['cuda'].version === '12') {
|
||||||
|
binaryFolder = join(binaryFolder, 'win-cuda-12-0')
|
||||||
|
} else {
|
||||||
|
binaryFolder = join(binaryFolder, 'win-cuda-11-7')
|
||||||
|
}
|
||||||
|
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
||||||
|
}
|
||||||
|
binaryName = 'nitro.exe'
|
||||||
|
} else if (process.platform === 'darwin') {
|
||||||
|
/**
|
||||||
|
* For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
|
||||||
|
*/
|
||||||
|
if (process.arch === 'arm64') {
|
||||||
|
binaryFolder = join(binaryFolder, 'mac-arm64')
|
||||||
|
} else {
|
||||||
|
binaryFolder = join(binaryFolder, 'mac-x64')
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/**
|
||||||
|
* For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
|
||||||
|
*/
|
||||||
|
let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
|
||||||
|
if (nvidiaInfo['run_mode'] === 'cpu') {
|
||||||
|
binaryFolder = join(binaryFolder, 'linux-cpu')
|
||||||
|
} else {
|
||||||
|
if (nvidiaInfo['cuda'].version === '12') {
|
||||||
|
binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
|
||||||
|
} else {
|
||||||
|
binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
|
||||||
|
}
|
||||||
|
cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
executablePath: join(binaryFolder, binaryName),
|
||||||
|
cudaVisibleDevices,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const validateModelStatus = async (): Promise<void> => {
|
||||||
|
// Send a GET request to the validation URL.
|
||||||
|
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
|
||||||
|
const fetchRT = require('fetch-retry')
|
||||||
|
const fetchRetry = fetchRT(fetch)
|
||||||
|
|
||||||
|
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
|
||||||
|
method: 'GET',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
retries: 5,
|
||||||
|
retryDelay: 500,
|
||||||
|
}).then(async (res: Response) => {
|
||||||
|
logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
|
||||||
|
// If the response is OK, check model_loaded status.
|
||||||
|
if (res.ok) {
|
||||||
|
const body = await res.json()
|
||||||
|
// If the model is loaded, return an empty object.
|
||||||
|
// Otherwise, return an object with an error message.
|
||||||
|
if (body.model_loaded) {
|
||||||
|
return Promise.resolve()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Promise.reject('Validate model status failed')
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
|
||||||
|
logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
|
||||||
|
const fetchRT = require('fetch-retry')
|
||||||
|
const fetchRetry = fetchRT(fetch)
|
||||||
|
|
||||||
|
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(settings),
|
||||||
|
retries: 3,
|
||||||
|
retryDelay: 500,
|
||||||
|
})
|
||||||
|
.then((res: any) => {
|
||||||
|
logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
|
||||||
|
return Promise.resolve(res)
|
||||||
|
})
|
||||||
|
.catch((err: any) => {
|
||||||
|
logServer(`[NITRO]::Error: Load model failed with error ${err}`)
|
||||||
|
return Promise.reject()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop model and kill nitro process.
|
||||||
|
*/
|
||||||
|
export const stopModel = async (_modelId: string) => {
|
||||||
|
if (!subprocess) {
|
||||||
|
return {
|
||||||
|
error: "Model isn't running",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
const controller = new AbortController()
|
||||||
|
setTimeout(() => {
|
||||||
|
controller.abort()
|
||||||
|
reject({
|
||||||
|
error: 'Failed to stop model: Timedout',
|
||||||
|
})
|
||||||
|
}, 5000)
|
||||||
|
const tcpPortUsed = require('tcp-port-used')
|
||||||
|
logServer(`[NITRO]::Debug: Request to kill Nitro`)
|
||||||
|
|
||||||
|
fetch(NITRO_HTTP_KILL_URL, {
|
||||||
|
method: 'DELETE',
|
||||||
|
signal: controller.signal,
|
||||||
|
})
|
||||||
|
.then(() => {
|
||||||
|
subprocess?.kill()
|
||||||
|
subprocess = undefined
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
// don't need to do anything, we still kill the subprocess
|
||||||
|
})
|
||||||
|
.then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
|
||||||
|
.then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
|
||||||
|
.then(() =>
|
||||||
|
resolve({
|
||||||
|
message: 'Model stopped',
|
||||||
|
})
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
@ -10,6 +10,8 @@ import {
|
|||||||
} from '../common/builder'
|
} from '../common/builder'
|
||||||
|
|
||||||
import { JanApiRouteConfiguration } from '../common/configuration'
|
import { JanApiRouteConfiguration } from '../common/configuration'
|
||||||
|
import { startModel, stopModel } from '../common/startStopModel'
|
||||||
|
import { ModelSettingParams } from '../../../types'
|
||||||
|
|
||||||
export const commonRouter = async (app: HttpServer) => {
|
export const commonRouter = async (app: HttpServer) => {
|
||||||
// Common Routes
|
// Common Routes
|
||||||
@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
|
|||||||
app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
|
app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
|
||||||
|
|
||||||
app.get(`/${key}/:id`, async (request: any) =>
|
app.get(`/${key}/:id`, async (request: any) =>
|
||||||
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
|
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
|
||||||
)
|
)
|
||||||
|
|
||||||
app.delete(`/${key}/:id`, async (request: any) =>
|
app.delete(`/${key}/:id`, async (request: any) =>
|
||||||
deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
|
deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
|
|
||||||
// Download Model Routes
|
// Download Model Routes
|
||||||
app.get(`/models/download/:modelId`, async (request: any) =>
|
app.get(`/models/download/:modelId`, async (request: any) =>
|
||||||
downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
|
downloadModel(request.params.modelId, {
|
||||||
|
ignoreSSL: request.query.ignoreSSL === 'true',
|
||||||
|
proxy: request.query.proxy,
|
||||||
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
app.put(`/models/:modelId/start`, async (request: any) => {
|
||||||
|
let settingParams: ModelSettingParams | undefined = undefined
|
||||||
|
if (Object.keys(request.body).length !== 0) {
|
||||||
|
settingParams = JSON.parse(request.body) as ModelSettingParams
|
||||||
|
}
|
||||||
|
|
||||||
|
return startModel(request.params.modelId, settingParams)
|
||||||
|
})
|
||||||
|
|
||||||
|
app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
|
||||||
|
|
||||||
// Chat Completion Routes
|
// Chat Completion Routes
|
||||||
app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
|
app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
|
||||||
|
|
||||||
|
|||||||
@ -1,16 +1,18 @@
|
|||||||
import { AppConfiguration } from "../../types";
|
import { AppConfiguration, SystemResourceInfo } from '../../types'
|
||||||
import { join } from "path";
|
import { join } from 'path'
|
||||||
import fs from "fs";
|
import fs from 'fs'
|
||||||
import os from "os";
|
import os from 'os'
|
||||||
|
import { log, logServer } from '../log'
|
||||||
|
import childProcess from 'child_process'
|
||||||
|
|
||||||
// TODO: move this to core
|
// TODO: move this to core
|
||||||
const configurationFileName = "settings.json";
|
const configurationFileName = 'settings.json'
|
||||||
|
|
||||||
// TODO: do no specify app name in framework module
|
// TODO: do no specify app name in framework module
|
||||||
const defaultJanDataFolder = join(os.homedir(), "jan");
|
const defaultJanDataFolder = join(os.homedir(), 'jan')
|
||||||
const defaultAppConfig: AppConfiguration = {
|
const defaultAppConfig: AppConfiguration = {
|
||||||
data_folder: defaultJanDataFolder,
|
data_folder: defaultJanDataFolder,
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Getting App Configurations.
|
* Getting App Configurations.
|
||||||
@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
|
|||||||
export const getAppConfigurations = (): AppConfiguration => {
|
export const getAppConfigurations = (): AppConfiguration => {
|
||||||
// Retrieve Application Support folder path
|
// Retrieve Application Support folder path
|
||||||
// Fallback to user home directory if not found
|
// Fallback to user home directory if not found
|
||||||
const configurationFile = getConfigurationFilePath();
|
const configurationFile = getConfigurationFilePath()
|
||||||
|
|
||||||
if (!fs.existsSync(configurationFile)) {
|
if (!fs.existsSync(configurationFile)) {
|
||||||
// create default app config if we don't have one
|
// create default app config if we don't have one
|
||||||
console.debug(`App config not found, creating default config at ${configurationFile}`);
|
console.debug(`App config not found, creating default config at ${configurationFile}`)
|
||||||
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
|
fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
|
||||||
return defaultAppConfig;
|
return defaultAppConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const appConfigurations: AppConfiguration = JSON.parse(
|
const appConfigurations: AppConfiguration = JSON.parse(
|
||||||
fs.readFileSync(configurationFile, "utf-8"),
|
fs.readFileSync(configurationFile, 'utf-8')
|
||||||
);
|
)
|
||||||
return appConfigurations;
|
return appConfigurations
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(`Failed to read app config, return default config instead! Err: ${err}`);
|
console.error(`Failed to read app config, return default config instead! Err: ${err}`)
|
||||||
return defaultAppConfig;
|
return defaultAppConfig
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
|
|
||||||
const getConfigurationFilePath = () =>
|
const getConfigurationFilePath = () =>
|
||||||
join(
|
join(
|
||||||
global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
|
global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
|
||||||
configurationFileName,
|
configurationFileName
|
||||||
);
|
)
|
||||||
|
|
||||||
export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
|
export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
|
||||||
const configurationFile = getConfigurationFilePath();
|
const configurationFile = getConfigurationFilePath()
|
||||||
console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
|
console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
|
||||||
|
|
||||||
fs.writeFileSync(configurationFile, JSON.stringify(configuration));
|
fs.writeFileSync(configurationFile, JSON.stringify(configuration))
|
||||||
return Promise.resolve();
|
return Promise.resolve()
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function to get server log path
|
* Utility function to get server log path
|
||||||
@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
|
|||||||
* @returns {string} The log path.
|
* @returns {string} The log path.
|
||||||
*/
|
*/
|
||||||
export const getServerLogPath = (): string => {
|
export const getServerLogPath = (): string => {
|
||||||
const appConfigurations = getAppConfigurations();
|
const appConfigurations = getAppConfigurations()
|
||||||
const logFolderPath = join(appConfigurations.data_folder, "logs");
|
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
||||||
if (!fs.existsSync(logFolderPath)) {
|
if (!fs.existsSync(logFolderPath)) {
|
||||||
fs.mkdirSync(logFolderPath, { recursive: true });
|
fs.mkdirSync(logFolderPath, { recursive: true })
|
||||||
}
|
}
|
||||||
return join(logFolderPath, "server.log");
|
return join(logFolderPath, 'server.log')
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function to get app log path
|
* Utility function to get app log path
|
||||||
@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
|
|||||||
* @returns {string} The log path.
|
* @returns {string} The log path.
|
||||||
*/
|
*/
|
||||||
export const getAppLogPath = (): string => {
|
export const getAppLogPath = (): string => {
|
||||||
const appConfigurations = getAppConfigurations();
|
const appConfigurations = getAppConfigurations()
|
||||||
const logFolderPath = join(appConfigurations.data_folder, "logs");
|
const logFolderPath = join(appConfigurations.data_folder, 'logs')
|
||||||
if (!fs.existsSync(logFolderPath)) {
|
if (!fs.existsSync(logFolderPath)) {
|
||||||
fs.mkdirSync(logFolderPath, { recursive: true });
|
fs.mkdirSync(logFolderPath, { recursive: true })
|
||||||
}
|
}
|
||||||
return join(logFolderPath, "app.log");
|
return join(logFolderPath, 'app.log')
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function to get data folder path
|
* Utility function to get data folder path
|
||||||
@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
|
|||||||
* @returns {string} The data folder path.
|
* @returns {string} The data folder path.
|
||||||
*/
|
*/
|
||||||
export const getJanDataFolderPath = (): string => {
|
export const getJanDataFolderPath = (): string => {
|
||||||
const appConfigurations = getAppConfigurations();
|
const appConfigurations = getAppConfigurations()
|
||||||
return appConfigurations.data_folder;
|
return appConfigurations.data_folder
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Utility function to get extension path
|
* Utility function to get extension path
|
||||||
@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
|
|||||||
* @returns {string} The extensions path.
|
* @returns {string} The extensions path.
|
||||||
*/
|
*/
|
||||||
export const getJanExtensionsPath = (): string => {
|
export const getJanExtensionsPath = (): string => {
|
||||||
const appConfigurations = getAppConfigurations();
|
const appConfigurations = getAppConfigurations()
|
||||||
return join(appConfigurations.data_folder, "extensions");
|
return join(appConfigurations.data_folder, 'extensions')
|
||||||
};
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility function to physical cpu count
|
||||||
|
*
|
||||||
|
* @returns {number} The physical cpu count.
|
||||||
|
*/
|
||||||
|
export const physicalCpuCount = async (): Promise<number> => {
|
||||||
|
const platform = os.platform()
|
||||||
|
if (platform === 'linux') {
|
||||||
|
const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
|
||||||
|
return parseInt(output.trim(), 10)
|
||||||
|
} else if (platform === 'darwin') {
|
||||||
|
const output = await exec('sysctl -n hw.physicalcpu_max')
|
||||||
|
return parseInt(output.trim(), 10)
|
||||||
|
} else if (platform === 'win32') {
|
||||||
|
const output = await exec('WMIC CPU Get NumberOfCores')
|
||||||
|
return output
|
||||||
|
.split(os.EOL)
|
||||||
|
.map((line: string) => parseInt(line))
|
||||||
|
.filter((value: number) => !isNaN(value))
|
||||||
|
.reduce((sum: number, number: number) => sum + number, 1)
|
||||||
|
} else {
|
||||||
|
const cores = os.cpus().filter((cpu: any, index: number) => {
|
||||||
|
const hasHyperthreading = cpu.model.includes('Intel')
|
||||||
|
const isOdd = index % 2 === 1
|
||||||
|
return !hasHyperthreading || isOdd
|
||||||
|
})
|
||||||
|
return cores.length
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const exec = async (command: string): Promise<string> => {
|
||||||
|
return new Promise((resolve, reject) => {
|
||||||
|
childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
|
||||||
|
if (error) {
|
||||||
|
reject(error)
|
||||||
|
} else {
|
||||||
|
resolve(stdout)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
|
||||||
|
const cpu = await physicalCpuCount()
|
||||||
|
const message = `[NITRO]::CPU informations - ${cpu}`
|
||||||
|
log(message)
|
||||||
|
logServer(message)
|
||||||
|
|
||||||
|
return {
|
||||||
|
numCpuPhysicalCore: cpu,
|
||||||
|
memAvailable: 0, // TODO: this should not be 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getEngineConfiguration = async (engineId: string) => {
|
||||||
|
if (engineId !== 'openai') {
|
||||||
|
return undefined
|
||||||
|
}
|
||||||
|
const directoryPath = join(getJanDataFolderPath(), 'engines')
|
||||||
|
const filePath = join(directoryPath, `${engineId}.json`)
|
||||||
|
const data = fs.readFileSync(filePath, 'utf-8')
|
||||||
|
return JSON.parse(data)
|
||||||
|
}
|
||||||
|
|||||||
@ -6,3 +6,4 @@ export * from './inference'
|
|||||||
export * from './monitoring'
|
export * from './monitoring'
|
||||||
export * from './file'
|
export * from './file'
|
||||||
export * from './config'
|
export * from './config'
|
||||||
|
export * from './miscellaneous'
|
||||||
|
|||||||
2
core/src/types/miscellaneous/index.ts
Normal file
2
core/src/types/miscellaneous/index.ts
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
export * from './systemResourceInfo'
|
||||||
|
export * from './promptTemplate'
|
||||||
6
core/src/types/miscellaneous/promptTemplate.ts
Normal file
6
core/src/types/miscellaneous/promptTemplate.ts
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
export type PromptTemplate = {
|
||||||
|
system_prompt?: string
|
||||||
|
ai_prompt?: string
|
||||||
|
user_prompt?: string
|
||||||
|
error?: string
|
||||||
|
}
|
||||||
4
core/src/types/miscellaneous/systemResourceInfo.ts
Normal file
4
core/src/types/miscellaneous/systemResourceInfo.ts
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
export type SystemResourceInfo = {
|
||||||
|
numCpuPhysicalCore: number
|
||||||
|
memAvailable: number
|
||||||
|
}
|
||||||
@ -123,6 +123,7 @@ export type ModelSettingParams = {
|
|||||||
user_prompt?: string
|
user_prompt?: string
|
||||||
llama_model_path?: string
|
llama_model_path?: string
|
||||||
mmproj?: string
|
mmproj?: string
|
||||||
|
cont_batching?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -2,22 +2,6 @@ declare const NODE: string;
|
|||||||
declare const INFERENCE_URL: string;
|
declare const INFERENCE_URL: string;
|
||||||
declare const TROUBLESHOOTING_URL: string;
|
declare const TROUBLESHOOTING_URL: string;
|
||||||
|
|
||||||
/**
|
|
||||||
* The parameters for the initModel function.
|
|
||||||
* @property settings - The settings for the machine learning model.
|
|
||||||
* @property settings.ctx_len - The context length.
|
|
||||||
* @property settings.ngl - The number of generated tokens.
|
|
||||||
* @property settings.cont_batching - Whether to use continuous batching.
|
|
||||||
* @property settings.embedding - Whether to use embedding.
|
|
||||||
*/
|
|
||||||
interface EngineSettings {
|
|
||||||
ctx_len: number;
|
|
||||||
ngl: number;
|
|
||||||
cpu_threads: number;
|
|
||||||
cont_batching: boolean;
|
|
||||||
embedding: boolean;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The response from the initModel function.
|
* The response from the initModel function.
|
||||||
* @property error - An error message if the model fails to load.
|
* @property error - An error message if the model fails to load.
|
||||||
@ -26,8 +10,3 @@ interface ModelOperationResponse {
|
|||||||
error?: any;
|
error?: any;
|
||||||
modelFile?: string;
|
modelFile?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface ResourcesInfo {
|
|
||||||
numCpuPhysicalCore: number;
|
|
||||||
memAvailable: number;
|
|
||||||
}
|
|
||||||
@ -24,6 +24,7 @@ import {
|
|||||||
MessageEvent,
|
MessageEvent,
|
||||||
ModelEvent,
|
ModelEvent,
|
||||||
InferenceEvent,
|
InferenceEvent,
|
||||||
|
ModelSettingParams,
|
||||||
} from "@janhq/core";
|
} from "@janhq/core";
|
||||||
import { requestInference } from "./helpers/sse";
|
import { requestInference } from "./helpers/sse";
|
||||||
import { ulid } from "ulid";
|
import { ulid } from "ulid";
|
||||||
@ -45,7 +46,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
|
|||||||
|
|
||||||
private _currentModel: Model | undefined;
|
private _currentModel: Model | undefined;
|
||||||
|
|
||||||
private _engineSettings: EngineSettings = {
|
private _engineSettings: ModelSettingParams = {
|
||||||
ctx_len: 2048,
|
ctx_len: 2048,
|
||||||
ngl: 100,
|
ngl: 100,
|
||||||
cpu_threads: 1,
|
cpu_threads: 1,
|
||||||
|
|||||||
@ -3,11 +3,19 @@ import path from "path";
|
|||||||
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
||||||
import tcpPortUsed from "tcp-port-used";
|
import tcpPortUsed from "tcp-port-used";
|
||||||
import fetchRT from "fetch-retry";
|
import fetchRT from "fetch-retry";
|
||||||
import { log, getJanDataFolderPath } from "@janhq/core/node";
|
import {
|
||||||
|
log,
|
||||||
|
getJanDataFolderPath,
|
||||||
|
getSystemResourceInfo,
|
||||||
|
} from "@janhq/core/node";
|
||||||
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
|
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
|
||||||
import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
|
import {
|
||||||
|
Model,
|
||||||
|
InferenceEngine,
|
||||||
|
ModelSettingParams,
|
||||||
|
PromptTemplate,
|
||||||
|
} from "@janhq/core";
|
||||||
import { executableNitroFile } from "./execute";
|
import { executableNitroFile } from "./execute";
|
||||||
import { physicalCpuCount } from "./utils";
|
|
||||||
|
|
||||||
// Polyfill fetch with retry
|
// Polyfill fetch with retry
|
||||||
const fetchRetry = fetchRT(fetch);
|
const fetchRetry = fetchRT(fetch);
|
||||||
@ -20,16 +28,6 @@ interface ModelInitOptions {
|
|||||||
model: Model;
|
model: Model;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* The response object of Prompt Template parsing.
|
|
||||||
*/
|
|
||||||
interface PromptTemplate {
|
|
||||||
system_prompt?: string;
|
|
||||||
ai_prompt?: string;
|
|
||||||
user_prompt?: string;
|
|
||||||
error?: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Model setting args for Nitro model load.
|
* Model setting args for Nitro model load.
|
||||||
*/
|
*/
|
||||||
@ -78,7 +76,7 @@ function stopModel(): Promise<void> {
|
|||||||
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
||||||
*/
|
*/
|
||||||
async function runModel(
|
async function runModel(
|
||||||
wrapper: ModelInitOptions,
|
wrapper: ModelInitOptions
|
||||||
): Promise<ModelOperationResponse | void> {
|
): Promise<ModelOperationResponse | void> {
|
||||||
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||||
// Not a nitro model
|
// Not a nitro model
|
||||||
@ -96,7 +94,7 @@ async function runModel(
|
|||||||
const ggufBinFile = files.find(
|
const ggufBinFile = files.find(
|
||||||
(file) =>
|
(file) =>
|
||||||
file === path.basename(currentModelFile) ||
|
file === path.basename(currentModelFile) ||
|
||||||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
|
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
|
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
|
||||||
@ -106,7 +104,7 @@ async function runModel(
|
|||||||
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||||
return Promise.reject("Not a nitro model");
|
return Promise.reject("Not a nitro model");
|
||||||
} else {
|
} else {
|
||||||
const nitroResourceProbe = await getResourcesInfo();
|
const nitroResourceProbe = await getSystemResourceInfo();
|
||||||
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||||
if (wrapper.model.settings.prompt_template) {
|
if (wrapper.model.settings.prompt_template) {
|
||||||
const promptTemplate = wrapper.model.settings.prompt_template;
|
const promptTemplate = wrapper.model.settings.prompt_template;
|
||||||
@ -191,10 +189,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|||||||
const system_prompt = promptTemplate.substring(0, systemIndex);
|
const system_prompt = promptTemplate.substring(0, systemIndex);
|
||||||
const user_prompt = promptTemplate.substring(
|
const user_prompt = promptTemplate.substring(
|
||||||
systemIndex + systemMarker.length,
|
systemIndex + systemMarker.length,
|
||||||
promptIndex,
|
promptIndex
|
||||||
);
|
);
|
||||||
const ai_prompt = promptTemplate.substring(
|
const ai_prompt = promptTemplate.substring(
|
||||||
promptIndex + promptMarker.length,
|
promptIndex + promptMarker.length
|
||||||
);
|
);
|
||||||
|
|
||||||
// Return the split parts
|
// Return the split parts
|
||||||
@ -204,7 +202,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|||||||
const promptIndex = promptTemplate.indexOf(promptMarker);
|
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||||
const user_prompt = promptTemplate.substring(0, promptIndex);
|
const user_prompt = promptTemplate.substring(0, promptIndex);
|
||||||
const ai_prompt = promptTemplate.substring(
|
const ai_prompt = promptTemplate.substring(
|
||||||
promptIndex + promptMarker.length,
|
promptIndex + promptMarker.length
|
||||||
);
|
);
|
||||||
|
|
||||||
// Return the split parts
|
// Return the split parts
|
||||||
@ -233,8 +231,8 @@ function loadLLMModel(settings: any): Promise<Response> {
|
|||||||
.then((res) => {
|
.then((res) => {
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
||||||
res,
|
res
|
||||||
)}`,
|
)}`
|
||||||
);
|
);
|
||||||
return Promise.resolve(res);
|
return Promise.resolve(res);
|
||||||
})
|
})
|
||||||
@ -263,8 +261,8 @@ async function validateModelStatus(): Promise<void> {
|
|||||||
}).then(async (res: Response) => {
|
}).then(async (res: Response) => {
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
||||||
res,
|
res
|
||||||
)}`,
|
)}`
|
||||||
);
|
);
|
||||||
// If the response is OK, check model_loaded status.
|
// If the response is OK, check model_loaded status.
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
@ -315,7 +313,7 @@ function spawnNitroProcess(): Promise<any> {
|
|||||||
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
|
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
|
||||||
// Execute the binary
|
// Execute the binary
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
|
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||||
);
|
);
|
||||||
subprocess = spawn(
|
subprocess = spawn(
|
||||||
executableOptions.executablePath,
|
executableOptions.executablePath,
|
||||||
@ -326,7 +324,7 @@ function spawnNitroProcess(): Promise<any> {
|
|||||||
...process.env,
|
...process.env,
|
||||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
// Handle subprocess output
|
// Handle subprocess output
|
||||||
@ -351,22 +349,6 @@ function spawnNitroProcess(): Promise<any> {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the system resources information
|
|
||||||
* TODO: Move to Core so that it can be reused
|
|
||||||
*/
|
|
||||||
function getResourcesInfo(): Promise<ResourcesInfo> {
|
|
||||||
return new Promise(async (resolve) => {
|
|
||||||
const cpu = await physicalCpuCount();
|
|
||||||
log(`[NITRO]::CPU informations - ${cpu}`);
|
|
||||||
const response: ResourcesInfo = {
|
|
||||||
numCpuPhysicalCore: cpu,
|
|
||||||
memAvailable: 0,
|
|
||||||
};
|
|
||||||
resolve(response);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Every module should have a dispose function
|
* Every module should have a dispose function
|
||||||
* This will be called when the extension is unloaded and should clean up any resources
|
* This will be called when the extension is unloaded and should clean up any resources
|
||||||
|
|||||||
@ -1,56 +0,0 @@
|
|||||||
import os from "os";
|
|
||||||
import childProcess from "child_process";
|
|
||||||
|
|
||||||
function exec(command: string): Promise<string> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
|
|
||||||
if (error) {
|
|
||||||
reject(error);
|
|
||||||
} else {
|
|
||||||
resolve(stdout);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let amount: number;
|
|
||||||
const platform = os.platform();
|
|
||||||
|
|
||||||
export async function physicalCpuCount(): Promise<number> {
|
|
||||||
return new Promise((resolve, reject) => {
|
|
||||||
if (platform === "linux") {
|
|
||||||
exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
|
|
||||||
.then((output) => {
|
|
||||||
amount = parseInt(output.trim(), 10);
|
|
||||||
resolve(amount);
|
|
||||||
})
|
|
||||||
.catch(reject);
|
|
||||||
} else if (platform === "darwin") {
|
|
||||||
exec("sysctl -n hw.physicalcpu_max")
|
|
||||||
.then((output) => {
|
|
||||||
amount = parseInt(output.trim(), 10);
|
|
||||||
resolve(amount);
|
|
||||||
})
|
|
||||||
.catch(reject);
|
|
||||||
} else if (platform === "win32") {
|
|
||||||
exec("WMIC CPU Get NumberOfCores")
|
|
||||||
.then((output) => {
|
|
||||||
amount = output
|
|
||||||
.split(os.EOL)
|
|
||||||
.map((line: string) => parseInt(line))
|
|
||||||
.filter((value: number) => !isNaN(value))
|
|
||||||
.reduce((sum: number, number: number) => sum + number, 1);
|
|
||||||
resolve(amount);
|
|
||||||
})
|
|
||||||
.catch(reject);
|
|
||||||
} else {
|
|
||||||
const cores = os.cpus().filter((cpu: any, index: number) => {
|
|
||||||
const hasHyperthreading = cpu.model.includes("Intel");
|
|
||||||
const isOdd = index % 2 === 1;
|
|
||||||
return !hasHyperthreading || isOdd;
|
|
||||||
});
|
|
||||||
amount = cores.length;
|
|
||||||
resolve(amount);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
@ -26,6 +26,8 @@
|
|||||||
"dotenv": "^16.3.1",
|
"dotenv": "^16.3.1",
|
||||||
"fastify": "^4.24.3",
|
"fastify": "^4.24.3",
|
||||||
"request": "^2.88.2",
|
"request": "^2.88.2",
|
||||||
|
"fetch-retry": "^5.0.6",
|
||||||
|
"tcp-port-used": "^1.0.2",
|
||||||
"request-progress": "^3.0.0"
|
"request-progress": "^3.0.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
@ -35,6 +37,7 @@
|
|||||||
"@typescript-eslint/parser": "^6.7.3",
|
"@typescript-eslint/parser": "^6.7.3",
|
||||||
"eslint-plugin-react": "^7.33.2",
|
"eslint-plugin-react": "^7.33.2",
|
||||||
"run-script-os": "^1.1.6",
|
"run-script-os": "^1.1.6",
|
||||||
|
"@types/tcp-port-used": "^1.0.4",
|
||||||
"typescript": "^5.2.2"
|
"typescript": "^5.2.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -7,7 +7,7 @@ import {
|
|||||||
ThreadState,
|
ThreadState,
|
||||||
Model,
|
Model,
|
||||||
} from '@janhq/core'
|
} from '@janhq/core'
|
||||||
import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
|
import { atom, useAtomValue, useSetAtom } from 'jotai'
|
||||||
|
|
||||||
import { fileUploadAtom } from '@/containers/Providers/Jotai'
|
import { fileUploadAtom } from '@/containers/Providers/Jotai'
|
||||||
|
|
||||||
@ -48,7 +48,8 @@ export const useCreateNewThread = () => {
|
|||||||
const createNewThread = useSetAtom(createNewThreadAtom)
|
const createNewThread = useSetAtom(createNewThreadAtom)
|
||||||
const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
|
const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
|
||||||
const updateThread = useSetAtom(updateThreadAtom)
|
const updateThread = useSetAtom(updateThreadAtom)
|
||||||
const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
|
|
||||||
|
const setFileUpload = useSetAtom(fileUploadAtom)
|
||||||
const { deleteThread } = useDeleteThread()
|
const { deleteThread } = useDeleteThread()
|
||||||
|
|
||||||
const requestCreateNewThread = async (
|
const requestCreateNewThread = async (
|
||||||
|
|||||||
@ -1,5 +1,3 @@
|
|||||||
import { useEffect } from 'react'
|
|
||||||
|
|
||||||
import {
|
import {
|
||||||
InferenceEvent,
|
InferenceEvent,
|
||||||
ExtensionTypeEnum,
|
ExtensionTypeEnum,
|
||||||
|
|||||||
@ -1,7 +1,6 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
||||||
'use client'
|
'use client'
|
||||||
|
|
||||||
import React, { useEffect, useState } from 'react'
|
import React, { useCallback, useEffect, useState } from 'react'
|
||||||
|
|
||||||
import ScrollToBottom from 'react-scroll-to-bottom'
|
import ScrollToBottom from 'react-scroll-to-bottom'
|
||||||
|
|
||||||
@ -81,14 +80,17 @@ const LocalServerScreen = () => {
|
|||||||
const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
|
const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
|
||||||
useState<boolean>(false)
|
useState<boolean>(false)
|
||||||
|
|
||||||
const handleChangePort = (value: any) => {
|
const handleChangePort = useCallback(
|
||||||
if (Number(value) <= 0 || Number(value) >= 65536) {
|
(value: string) => {
|
||||||
setErrorRangePort(true)
|
if (Number(value) <= 0 || Number(value) >= 65536) {
|
||||||
} else {
|
setErrorRangePort(true)
|
||||||
setErrorRangePort(false)
|
} else {
|
||||||
}
|
setErrorRangePort(false)
|
||||||
setPort(value)
|
}
|
||||||
}
|
setPort(value)
|
||||||
|
},
|
||||||
|
[setPort]
|
||||||
|
)
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) {
|
if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) {
|
||||||
@ -98,7 +100,7 @@ const LocalServerScreen = () => {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
handleChangePort(port)
|
handleChangePort(port)
|
||||||
}, [])
|
}, [handleChangePort, port])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div className="flex h-full w-full">
|
<div className="flex h-full w-full">
|
||||||
|
|||||||
@ -1,6 +1,4 @@
|
|||||||
import React, { useCallback, useEffect, useState } from 'react'
|
import React, { useCallback, useState } from 'react'
|
||||||
|
|
||||||
import { fs, AppConfiguration, joinPath, getUserHomePath } from '@janhq/core'
|
|
||||||
|
|
||||||
import {
|
import {
|
||||||
Modal,
|
Modal,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user