feat: add start/stop model via http api (#1862)

Signed-off-by: nam <namnh0122@gmail.com>
2024-02-01 11:25:34 +07:00 · 2024-02-01 11:25:34 +07:00 · 4116aaa98a
commit 4116aaa98a
parent 4b8b13b5d3
19 changed files with 559 additions and 194 deletions
--- a/core/src/node/api/common/builder.ts
+++ b/core/src/node/api/common/builder.ts
@ -2,7 +2,8 @@ import fs from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
 import { ContentType, MessageStatus, Model, ThreadMessage } from './../../../index'
-import { getJanDataFolderPath } from '../../utils'
+import { getEngineConfiguration, getJanDataFolderPath } from '../../utils'
 import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
 export const getBuilder = async (configuration: RouteConfiguration) => {
  const directoryPath = join(getJanDataFolderPath(), configuration.dirName)
@ -309,7 +310,7 @@ export const chatCompletions = async (request: any, reply: any) => {
  const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
  let apiKey: string | undefined = undefined
-  let apiUrl: string = 'http://127.0.0.1:3928/inferences/llamacpp/chat_completion' // default nitro url
+  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
  if (engineConfiguration) {
    apiKey = engineConfiguration.api_key
@ -320,7 +321,7 @@ export const chatCompletions = async (request: any, reply: any) => {
    'Content-Type': 'text/event-stream',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
-    "Access-Control-Allow-Origin": "*"
+    'Access-Control-Allow-Origin': '*',
  })
  const headers: Record<string, any> = {
@ -346,13 +347,3 @@ export const chatCompletions = async (request: any, reply: any) => {
    response.body.pipe(reply.raw)
  }
 }
 const getEngineConfiguration = async (engineId: string) => {
  if (engineId !== 'openai') {
    return undefined
  }
  const directoryPath = join(getJanDataFolderPath(), 'engines')
  const filePath = join(directoryPath, `${engineId}.json`)
  const data = await fs.readFileSync(filePath, 'utf-8')
  return JSON.parse(data)
 }
--- a/core/src/node/api/common/consts.ts
+++ b/core/src/node/api/common/consts.ts
@ -0,0 +1,19 @@
 // The PORT to use for the Nitro subprocess
 export const NITRO_DEFAULT_PORT = 3928
 // The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
 // The URL for the Nitro subprocess
 const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
 // The URL for the Nitro subprocess to load a model
 export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
 // The URL for the Nitro subprocess to validate a model
 export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
 // The URL for the Nitro subprocess to kill itself
 export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
 export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/llamacpp/chat_completion` // default nitro url
--- a/core/src/node/api/common/startStopModel.ts
+++ b/core/src/node/api/common/startStopModel.ts
@ -0,0 +1,351 @@
 import fs from 'fs'
 import { join } from 'path'
 import { getJanDataFolderPath, getJanExtensionsPath, getSystemResourceInfo } from '../../utils'
 import { logServer } from '../../log'
 import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
 import { Model, ModelSettingParams, PromptTemplate } from '../../../types'
 import {
  LOCAL_HOST,
  NITRO_DEFAULT_PORT,
  NITRO_HTTP_KILL_URL,
  NITRO_HTTP_LOAD_MODEL_URL,
  NITRO_HTTP_VALIDATE_MODEL_URL,
  SUPPORTED_MODEL_FORMAT,
 } from './consts'
 // The subprocess instance for Nitro
 let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
 // TODO: move this to core type
 interface NitroModelSettings extends ModelSettingParams {
  llama_model_path: string
  cpu_threads: number
 }
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
  try {
    await runModel(modelId, settingParams)
    return {
      message: `Model ${modelId} started`,
    }
  } catch (e) {
    return {
      error: e,
    }
  }
 }
 const runModel = async (modelId: string, settingParams?: ModelSettingParams): Promise<void> => {
  const janDataFolderPath = getJanDataFolderPath()
  const modelFolderFullPath = join(janDataFolderPath, 'models', modelId)
  if (!fs.existsSync(modelFolderFullPath)) {
    throw `Model not found: ${modelId}`
  }
  const files: string[] = fs.readdirSync(modelFolderFullPath)
  // Look for GGUF model file
  const ggufBinFile = files.find((file) => file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT))
  const modelMetadataPath = join(modelFolderFullPath, 'model.json')
  const modelMetadata: Model = JSON.parse(fs.readFileSync(modelMetadataPath, 'utf-8'))
  if (!ggufBinFile) {
    throw 'No GGUF model file found'
  }
  const modelBinaryPath = join(modelFolderFullPath, ggufBinFile)
  const nitroResourceProbe = await getSystemResourceInfo()
  const nitroModelSettings: NitroModelSettings = {
    ...modelMetadata.settings,
    ...settingParams,
    llama_model_path: modelBinaryPath,
    // This is critical and requires real CPU physical core count (or performance core)
    cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
    ...(modelMetadata.settings.mmproj && {
      mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
    }),
  }
  logServer(`[NITRO]::Debug: Nitro model settings: ${JSON.stringify(nitroModelSettings)}`)
  // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
  if (modelMetadata.settings.prompt_template) {
    const promptTemplate = modelMetadata.settings.prompt_template
    const prompt = promptTemplateConverter(promptTemplate)
    if (prompt?.error) {
      return Promise.reject(prompt.error)
    }
    nitroModelSettings.system_prompt = prompt.system_prompt
    nitroModelSettings.user_prompt = prompt.user_prompt
    nitroModelSettings.ai_prompt = prompt.ai_prompt
  }
  await runNitroAndLoadModel(modelId, nitroModelSettings)
 }
 // TODO: move to util
 const promptTemplateConverter = (promptTemplate: string): PromptTemplate => {
  // Split the string using the markers
  const systemMarker = '{system_message}'
  const promptMarker = '{prompt}'
  if (promptTemplate.includes(systemMarker) && promptTemplate.includes(promptMarker)) {
    // Find the indices of the markers
    const systemIndex = promptTemplate.indexOf(systemMarker)
    const promptIndex = promptTemplate.indexOf(promptMarker)
    // Extract the parts of the string
    const system_prompt = promptTemplate.substring(0, systemIndex)
    const user_prompt = promptTemplate.substring(systemIndex + systemMarker.length, promptIndex)
    const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
    // Return the split parts
    return { system_prompt, user_prompt, ai_prompt }
  } else if (promptTemplate.includes(promptMarker)) {
    // Extract the parts of the string for the case where only promptMarker is present
    const promptIndex = promptTemplate.indexOf(promptMarker)
    const user_prompt = promptTemplate.substring(0, promptIndex)
    const ai_prompt = promptTemplate.substring(promptIndex + promptMarker.length)
    // Return the split parts
    return { user_prompt, ai_prompt }
  }
  // Return an error if none of the conditions are met
  return { error: 'Cannot split prompt template' }
 }
 const runNitroAndLoadModel = async (modelId: string, modelSettings: NitroModelSettings) => {
  // Gather system information for CPU physical cores and memory
  const tcpPortUsed = require('tcp-port-used')
  await stopModel(modelId)
  await tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000)
  /**
   * There is a problem with Windows process manager
   * Should wait for awhile to make sure the port is free and subprocess is killed
   * The tested threshold is 500ms
   **/
  if (process.platform === 'win32') {
    await new Promise((resolve) => setTimeout(resolve, 500))
  }
  await spawnNitroProcess()
  await loadLLMModel(modelSettings)
  await validateModelStatus()
 }
 const spawnNitroProcess = async (): Promise<void> => {
  logServer(`[NITRO]::Debug: Spawning Nitro subprocess...`)
  let binaryFolder = join(
    getJanExtensionsPath(),
    '@janhq',
    'inference-nitro-extension',
    'dist',
    'bin'
  )
  let executableOptions = executableNitroFile()
  const tcpPortUsed = require('tcp-port-used')
  const args: string[] = ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()]
  // Execute the binary
  logServer(
    `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
  )
  subprocess = spawn(
    executableOptions.executablePath,
    ['1', LOCAL_HOST, NITRO_DEFAULT_PORT.toString()],
    {
      cwd: binaryFolder,
      env: {
        ...process.env,
        CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
      },
    }
  )
  // Handle subprocess output
  subprocess.stdout.on('data', (data: any) => {
    logServer(`[NITRO]::Debug: ${data}`)
  })
  subprocess.stderr.on('data', (data: any) => {
    logServer(`[NITRO]::Error: ${data}`)
  })
  subprocess.on('close', (code: any) => {
    logServer(`[NITRO]::Debug: Nitro exited with code: ${code}`)
    subprocess = undefined
  })
  tcpPortUsed.waitUntilUsed(NITRO_DEFAULT_PORT, 300, 30000).then(() => {
    logServer(`[NITRO]::Debug: Nitro is ready`)
  })
 }
 type NitroExecutableOptions = {
  executablePath: string
  cudaVisibleDevices: string
 }
 const executableNitroFile = (): NitroExecutableOptions => {
  const nvidiaInfoFilePath = join(getJanDataFolderPath(), 'settings', 'settings.json')
  let binaryFolder = join(
    getJanExtensionsPath(),
    '@janhq',
    'inference-nitro-extension',
    'dist',
    'bin'
  )
  let cudaVisibleDevices = ''
  let binaryName = 'nitro'
  /**
   * The binary folder is different for each platform.
   */
  if (process.platform === 'win32') {
    /**
     *  For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
     */
    let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
    if (nvidiaInfo['run_mode'] === 'cpu') {
      binaryFolder = join(binaryFolder, 'win-cpu')
    } else {
      if (nvidiaInfo['cuda'].version === '12') {
        binaryFolder = join(binaryFolder, 'win-cuda-12-0')
      } else {
        binaryFolder = join(binaryFolder, 'win-cuda-11-7')
      }
      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
    }
    binaryName = 'nitro.exe'
  } else if (process.platform === 'darwin') {
    /**
     *  For MacOS: mac-arm64 (Silicon), mac-x64 (InteL)
     */
    if (process.arch === 'arm64') {
      binaryFolder = join(binaryFolder, 'mac-arm64')
    } else {
      binaryFolder = join(binaryFolder, 'mac-x64')
    }
  } else {
    /**
     *  For Linux: linux-cpu, linux-cuda-11-7, linux-cuda-12-0
     */
    let nvidiaInfo = JSON.parse(fs.readFileSync(nvidiaInfoFilePath, 'utf-8'))
    if (nvidiaInfo['run_mode'] === 'cpu') {
      binaryFolder = join(binaryFolder, 'linux-cpu')
    } else {
      if (nvidiaInfo['cuda'].version === '12') {
        binaryFolder = join(binaryFolder, 'linux-cuda-12-0')
      } else {
        binaryFolder = join(binaryFolder, 'linux-cuda-11-7')
      }
      cudaVisibleDevices = nvidiaInfo['gpu_highest_vram']
    }
  }
  return {
    executablePath: join(binaryFolder, binaryName),
    cudaVisibleDevices,
  }
 }
 const validateModelStatus = async (): Promise<void> => {
  // Send a GET request to the validation URL.
  // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
  const fetchRT = require('fetch-retry')
  const fetchRetry = fetchRT(fetch)
  return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
    method: 'GET',
    headers: {
      'Content-Type': 'application/json',
    },
    retries: 5,
    retryDelay: 500,
  }).then(async (res: Response) => {
    logServer(`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(res)}`)
    // If the response is OK, check model_loaded status.
    if (res.ok) {
      const body = await res.json()
      // If the model is loaded, return an empty object.
      // Otherwise, return an object with an error message.
      if (body.model_loaded) {
        return Promise.resolve()
      }
    }
    return Promise.reject('Validate model status failed')
  })
 }
 const loadLLMModel = async (settings: NitroModelSettings): Promise<Response> => {
  logServer(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
  const fetchRT = require('fetch-retry')
  const fetchRetry = fetchRT(fetch)
  return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify(settings),
    retries: 3,
    retryDelay: 500,
  })
    .then((res: any) => {
      logServer(`[NITRO]::Debug: Load model success with response ${JSON.stringify(res)}`)
      return Promise.resolve(res)
    })
    .catch((err: any) => {
      logServer(`[NITRO]::Error: Load model failed with error ${err}`)
      return Promise.reject()
    })
 }
 /**
 * Stop model and kill nitro process.
 */
 export const stopModel = async (_modelId: string) => {
  if (!subprocess) {
    return {
      error: "Model isn't running",
    }
  }
  return new Promise((resolve, reject) => {
    const controller = new AbortController()
    setTimeout(() => {
      controller.abort()
      reject({
        error: 'Failed to stop model: Timedout',
      })
    }, 5000)
    const tcpPortUsed = require('tcp-port-used')
    logServer(`[NITRO]::Debug: Request to kill Nitro`)
    fetch(NITRO_HTTP_KILL_URL, {
      method: 'DELETE',
      signal: controller.signal,
    })
      .then(() => {
        subprocess?.kill()
        subprocess = undefined
      })
      .catch(() => {
        // don't need to do anything, we still kill the subprocess
      })
      .then(() => tcpPortUsed.waitUntilFree(NITRO_DEFAULT_PORT, 300, 5000))
      .then(() => logServer(`[NITRO]::Debug: Nitro process is terminated`))
      .then(() =>
        resolve({
          message: 'Model stopped',
        })
      )
  })
 }
--- a/core/src/node/api/routes/common.ts
+++ b/core/src/node/api/routes/common.ts
@ -10,6 +10,8 @@ import {
 } from '../common/builder'
 import { JanApiRouteConfiguration } from '../common/configuration'
 import { startModel, stopModel } from '../common/startStopModel'
 import { ModelSettingParams } from '../../../types'
 export const commonRouter = async (app: HttpServer) => {
  // Common Routes
@ -17,19 +19,33 @@ export const commonRouter = async (app: HttpServer) => {
    app.get(`/${key}`, async (_request) => getBuilder(JanApiRouteConfiguration[key]))
    app.get(`/${key}/:id`, async (request: any) =>
-      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id),
+      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
    )
    app.delete(`/${key}/:id`, async (request: any) =>
-      deleteBuilder(JanApiRouteConfiguration[key], request.params.id),
+      deleteBuilder(JanApiRouteConfiguration[key], request.params.id)
    )
  })
  // Download Model Routes
  app.get(`/models/download/:modelId`, async (request: any) =>
-    downloadModel(request.params.modelId, { ignoreSSL: request.query.ignoreSSL === 'true', proxy: request.query.proxy }),
+    downloadModel(request.params.modelId, {
      ignoreSSL: request.query.ignoreSSL === 'true',
      proxy: request.query.proxy,
    })
  )
  app.put(`/models/:modelId/start`, async (request: any) => {
    let settingParams: ModelSettingParams | undefined = undefined
    if (Object.keys(request.body).length !== 0) {
      settingParams = JSON.parse(request.body) as ModelSettingParams
    }
    return startModel(request.params.modelId, settingParams)
  })
  app.put(`/models/:modelId/stop`, async (request: any) => stopModel(request.params.modelId))
  // Chat Completion Routes
  app.post(`/chat/completions`, async (request: any, reply: any) => chatCompletions(request, reply))
--- a/core/src/node/utils/index.ts
+++ b/core/src/node/utils/index.ts
@ -1,16 +1,18 @@
-import { AppConfiguration } from "../../types";
+import { AppConfiguration, SystemResourceInfo } from '../../types'
-import { join } from "path";
+import { join } from 'path'
-import fs from "fs";
+import fs from 'fs'
-import os from "os";
+import os from 'os'
 import { log, logServer } from '../log'
 import childProcess from 'child_process'
 // TODO: move this to core
-const configurationFileName = "settings.json";
+const configurationFileName = 'settings.json'
 // TODO: do no specify app name in framework module
-const defaultJanDataFolder = join(os.homedir(), "jan");
+const defaultJanDataFolder = join(os.homedir(), 'jan')
 const defaultAppConfig: AppConfiguration = {
  data_folder: defaultJanDataFolder,
-};
+}
 /**
 * Getting App Configurations.
@ -20,39 +22,39 @@ const defaultAppConfig: AppConfiguration = {
 export const getAppConfigurations = (): AppConfiguration => {
  // Retrieve Application Support folder path
  // Fallback to user home directory if not found
-  const configurationFile = getConfigurationFilePath();
+  const configurationFile = getConfigurationFilePath()
  if (!fs.existsSync(configurationFile)) {
    // create default app config if we don't have one
-    console.debug(`App config not found, creating default config at ${configurationFile}`);
+    console.debug(`App config not found, creating default config at ${configurationFile}`)
-    fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig));
+    fs.writeFileSync(configurationFile, JSON.stringify(defaultAppConfig))
-    return defaultAppConfig;
+    return defaultAppConfig
  }
  try {
    const appConfigurations: AppConfiguration = JSON.parse(
-      fs.readFileSync(configurationFile, "utf-8"),
+      fs.readFileSync(configurationFile, 'utf-8')
-    );
+    )
-    return appConfigurations;
+    return appConfigurations
  } catch (err) {
-    console.error(`Failed to read app config, return default config instead! Err: ${err}`);
+    console.error(`Failed to read app config, return default config instead! Err: ${err}`)
-    return defaultAppConfig;
+    return defaultAppConfig
  }
-};
+}
 const getConfigurationFilePath = () =>
  join(
-    global.core?.appPath() || process.env[process.platform == "win32" ? "USERPROFILE" : "HOME"],
+    global.core?.appPath() || process.env[process.platform == 'win32' ? 'USERPROFILE' : 'HOME'],
-    configurationFileName,
+    configurationFileName
-  );
+  )
 export const updateAppConfiguration = (configuration: AppConfiguration): Promise<void> => {
-  const configurationFile = getConfigurationFilePath();
+  const configurationFile = getConfigurationFilePath()
-  console.debug("updateAppConfiguration, configurationFile: ", configurationFile);
+  console.debug('updateAppConfiguration, configurationFile: ', configurationFile)
-  fs.writeFileSync(configurationFile, JSON.stringify(configuration));
+  fs.writeFileSync(configurationFile, JSON.stringify(configuration))
-  return Promise.resolve();
+  return Promise.resolve()
-};
+}
 /**
 * Utility function to get server log path
@ -60,13 +62,13 @@ export const updateAppConfiguration = (configuration: AppConfiguration): Promise
 * @returns {string} The log path.
 */
 export const getServerLogPath = (): string => {
-  const appConfigurations = getAppConfigurations();
+  const appConfigurations = getAppConfigurations()
-  const logFolderPath = join(appConfigurations.data_folder, "logs");
+  const logFolderPath = join(appConfigurations.data_folder, 'logs')
  if (!fs.existsSync(logFolderPath)) {
-    fs.mkdirSync(logFolderPath, { recursive: true });
+    fs.mkdirSync(logFolderPath, { recursive: true })
  }
-  return join(logFolderPath, "server.log");
+  return join(logFolderPath, 'server.log')
-};
+}
 /**
 * Utility function to get app log path
@ -74,13 +76,13 @@ export const getServerLogPath = (): string => {
 * @returns {string} The log path.
 */
 export const getAppLogPath = (): string => {
-  const appConfigurations = getAppConfigurations();
+  const appConfigurations = getAppConfigurations()
-  const logFolderPath = join(appConfigurations.data_folder, "logs");
+  const logFolderPath = join(appConfigurations.data_folder, 'logs')
  if (!fs.existsSync(logFolderPath)) {
-    fs.mkdirSync(logFolderPath, { recursive: true });
+    fs.mkdirSync(logFolderPath, { recursive: true })
  }
-  return join(logFolderPath, "app.log");
+  return join(logFolderPath, 'app.log')
-};
+}
 /**
 * Utility function to get data folder path
@ -88,9 +90,9 @@ export const getAppLogPath = (): string => {
 * @returns {string} The data folder path.
 */
 export const getJanDataFolderPath = (): string => {
-  const appConfigurations = getAppConfigurations();
+  const appConfigurations = getAppConfigurations()
-  return appConfigurations.data_folder;
+  return appConfigurations.data_folder
-};
+}
 /**
 * Utility function to get extension path
@ -98,6 +100,70 @@ export const getJanDataFolderPath = (): string => {
 * @returns {string} The extensions path.
 */
 export const getJanExtensionsPath = (): string => {
-  const appConfigurations = getAppConfigurations();
+  const appConfigurations = getAppConfigurations()
-  return join(appConfigurations.data_folder, "extensions");
+  return join(appConfigurations.data_folder, 'extensions')
-};
+}
 /**
 * Utility function to physical cpu count
 *
 * @returns {number} The physical cpu count.
 */
 export const physicalCpuCount = async (): Promise<number> => {
  const platform = os.platform()
  if (platform === 'linux') {
    const output = await exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
    return parseInt(output.trim(), 10)
  } else if (platform === 'darwin') {
    const output = await exec('sysctl -n hw.physicalcpu_max')
    return parseInt(output.trim(), 10)
  } else if (platform === 'win32') {
    const output = await exec('WMIC CPU Get NumberOfCores')
    return output
      .split(os.EOL)
      .map((line: string) => parseInt(line))
      .filter((value: number) => !isNaN(value))
      .reduce((sum: number, number: number) => sum + number, 1)
  } else {
    const cores = os.cpus().filter((cpu: any, index: number) => {
      const hasHyperthreading = cpu.model.includes('Intel')
      const isOdd = index % 2 === 1
      return !hasHyperthreading || isOdd
    })
    return cores.length
  }
 }
 const exec = async (command: string): Promise<string> => {
  return new Promise((resolve, reject) => {
    childProcess.exec(command, { encoding: 'utf8' }, (error, stdout) => {
      if (error) {
        reject(error)
      } else {
        resolve(stdout)
      }
    })
  })
 }
 export const getSystemResourceInfo = async (): Promise<SystemResourceInfo> => {
  const cpu = await physicalCpuCount()
  const message = `[NITRO]::CPU informations - ${cpu}`
  log(message)
  logServer(message)
  return {
    numCpuPhysicalCore: cpu,
    memAvailable: 0, // TODO: this should not be 0
  }
 }
 export const getEngineConfiguration = async (engineId: string) => {
  if (engineId !== 'openai') {
    return undefined
  }
  const directoryPath = join(getJanDataFolderPath(), 'engines')
  const filePath = join(directoryPath, `${engineId}.json`)
  const data = fs.readFileSync(filePath, 'utf-8')
  return JSON.parse(data)
 }
--- a/core/src/types/index.ts
+++ b/core/src/types/index.ts
@ -6,3 +6,4 @@ export * from './inference'
 export * from './monitoring'
 export * from './file'
 export * from './config'
 export * from './miscellaneous'
--- a/core/src/types/miscellaneous/index.ts
+++ b/core/src/types/miscellaneous/index.ts
@ -0,0 +1,2 @@
 export * from './systemResourceInfo'
 export * from './promptTemplate'
--- a/core/src/types/miscellaneous/promptTemplate.ts
+++ b/core/src/types/miscellaneous/promptTemplate.ts
@ -0,0 +1,6 @@
 export type PromptTemplate = {
  system_prompt?: string
  ai_prompt?: string
  user_prompt?: string
  error?: string
 }
--- a/core/src/types/miscellaneous/systemResourceInfo.ts
+++ b/core/src/types/miscellaneous/systemResourceInfo.ts
@ -0,0 +1,4 @@
 export type SystemResourceInfo = {
  numCpuPhysicalCore: number
  memAvailable: number
 }
--- a/core/src/types/model/modelEntity.ts
+++ b/core/src/types/model/modelEntity.ts
@ -123,6 +123,7 @@ export type ModelSettingParams = {
  user_prompt?: string
  llama_model_path?: string
  mmproj?: string
  cont_batching?: boolean
 }
 /**
--- a/extensions/inference-nitro-extension/src/@types/global.d.ts
+++ b/extensions/inference-nitro-extension/src/@types/global.d.ts
@ -2,22 +2,6 @@ declare const NODE: string;
 declare const INFERENCE_URL: string;
 declare const TROUBLESHOOTING_URL: string;
 /**
 * The parameters for the initModel function.
 * @property settings - The settings for the machine learning model.
 * @property settings.ctx_len - The context length.
 * @property settings.ngl - The number of generated tokens.
 * @property settings.cont_batching - Whether to use continuous batching.
 * @property settings.embedding - Whether to use embedding.
 */
 interface EngineSettings {
  ctx_len: number;
  ngl: number;
  cpu_threads: number;
  cont_batching: boolean;
  embedding: boolean;
 }
 /**
 * The response from the initModel function.
 * @property error - An error message if the model fails to load.
@ -26,8 +10,3 @@ interface ModelOperationResponse {
  error?: any;
  modelFile?: string;
 }
 interface ResourcesInfo {
  numCpuPhysicalCore: number;
  memAvailable: number;
 }
--- a/extensions/inference-nitro-extension/src/index.ts
+++ b/extensions/inference-nitro-extension/src/index.ts
@ -24,6 +24,7 @@ import {
  MessageEvent,
  ModelEvent,
  InferenceEvent,
  ModelSettingParams,
 } from "@janhq/core";
 import { requestInference } from "./helpers/sse";
 import { ulid } from "ulid";
@ -45,7 +46,7 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
  private _currentModel: Model | undefined;
-  private _engineSettings: EngineSettings = {
+  private _engineSettings: ModelSettingParams = {
    ctx_len: 2048,
    ngl: 100,
    cpu_threads: 1,
--- a/extensions/inference-nitro-extension/src/node/index.ts
+++ b/extensions/inference-nitro-extension/src/node/index.ts
@ -3,11 +3,19 @@ import path from "path";
 import { ChildProcessWithoutNullStreams, spawn } from "child_process";
 import tcpPortUsed from "tcp-port-used";
 import fetchRT from "fetch-retry";
-import { log, getJanDataFolderPath } from "@janhq/core/node";
+import {
  log,
  getJanDataFolderPath,
  getSystemResourceInfo,
 } from "@janhq/core/node";
 import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
-import { Model, InferenceEngine, ModelSettingParams } from "@janhq/core";
+import {
  Model,
  InferenceEngine,
  ModelSettingParams,
  PromptTemplate,
 } from "@janhq/core";
 import { executableNitroFile } from "./execute";
 import { physicalCpuCount } from "./utils";
 // Polyfill fetch with retry
 const fetchRetry = fetchRT(fetch);
@ -20,16 +28,6 @@ interface ModelInitOptions {
  model: Model;
 }
 /**
 * The response object of Prompt Template parsing.
 */
 interface PromptTemplate {
  system_prompt?: string;
  ai_prompt?: string;
  user_prompt?: string;
  error?: string;
 }
 /**
 * Model setting args for Nitro model load.
 */
@ -78,7 +76,7 @@ function stopModel(): Promise<void> {
 * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
 */
 async function runModel(
-  wrapper: ModelInitOptions,
+  wrapper: ModelInitOptions
 ): Promise<ModelOperationResponse | void> {
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    // Not a nitro model
@ -96,7 +94,7 @@ async function runModel(
  const ggufBinFile = files.find(
    (file) =>
      file === path.basename(currentModelFile) ||
-      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
+      file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
  );
  if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -106,7 +104,7 @@ async function runModel(
  if (wrapper.model.engine !== InferenceEngine.nitro) {
    return Promise.reject("Not a nitro model");
  } else {
-    const nitroResourceProbe = await getResourcesInfo();
+    const nitroResourceProbe = await getSystemResourceInfo();
    // Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
    if (wrapper.model.settings.prompt_template) {
      const promptTemplate = wrapper.model.settings.prompt_template;
@ -191,10 +189,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
    const system_prompt = promptTemplate.substring(0, systemIndex);
    const user_prompt = promptTemplate.substring(
      systemIndex + systemMarker.length,
-      promptIndex,
+      promptIndex
    );
    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length,
+      promptIndex + promptMarker.length
    );
    // Return the split parts
@ -204,7 +202,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
    const promptIndex = promptTemplate.indexOf(promptMarker);
    const user_prompt = promptTemplate.substring(0, promptIndex);
    const ai_prompt = promptTemplate.substring(
-      promptIndex + promptMarker.length,
+      promptIndex + promptMarker.length
    );
    // Return the split parts
@ -233,8 +231,8 @@ function loadLLMModel(settings: any): Promise<Response> {
    .then((res) => {
      log(
        `[NITRO]::Debug: Load model success with response ${JSON.stringify(
-          res,
+          res
-        )}`,
+        )}`
      );
      return Promise.resolve(res);
    })
@ -263,8 +261,8 @@ async function validateModelStatus(): Promise<void> {
  }).then(async (res: Response) => {
    log(
      `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
-        res,
+        res
-      )}`,
+      )}`
    );
    // If the response is OK, check model_loaded status.
    if (res.ok) {
@ -315,7 +313,7 @@ function spawnNitroProcess(): Promise<any> {
    const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
    // Execute the binary
    log(
-      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
+      `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
    );
    subprocess = spawn(
      executableOptions.executablePath,
@ -326,7 +324,7 @@ function spawnNitroProcess(): Promise<any> {
          ...process.env,
          CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
        },
-      },
+      }
    );
    // Handle subprocess output
@ -351,22 +349,6 @@ function spawnNitroProcess(): Promise<any> {
  });
 }
 /**
 * Get the system resources information
 * TODO: Move to Core so that it can be reused
 */
 function getResourcesInfo(): Promise<ResourcesInfo> {
  return new Promise(async (resolve) => {
    const cpu = await physicalCpuCount();
    log(`[NITRO]::CPU informations - ${cpu}`);
    const response: ResourcesInfo = {
      numCpuPhysicalCore: cpu,
      memAvailable: 0,
    };
    resolve(response);
  });
 }
 /**
 * Every module should have a dispose function
 * This will be called when the extension is unloaded and should clean up any resources
--- a/extensions/inference-nitro-extension/src/node/utils.ts
+++ b/extensions/inference-nitro-extension/src/node/utils.ts
@ -1,56 +0,0 @@
 import os from "os";
 import childProcess from "child_process";
 function exec(command: string): Promise<string> {
  return new Promise((resolve, reject) => {
    childProcess.exec(command, { encoding: "utf8" }, (error, stdout) => {
      if (error) {
        reject(error);
      } else {
        resolve(stdout);
      }
    });
  });
 }
 let amount: number;
 const platform = os.platform();
 export async function physicalCpuCount(): Promise<number> {
  return new Promise((resolve, reject) => {
    if (platform === "linux") {
      exec('lscpu -p | egrep -v "^#" | sort -u -t, -k 2,4 | wc -l')
        .then((output) => {
          amount = parseInt(output.trim(), 10);
          resolve(amount);
        })
        .catch(reject);
    } else if (platform === "darwin") {
      exec("sysctl -n hw.physicalcpu_max")
        .then((output) => {
          amount = parseInt(output.trim(), 10);
          resolve(amount);
        })
        .catch(reject);
    } else if (platform === "win32") {
      exec("WMIC CPU Get NumberOfCores")
        .then((output) => {
          amount = output
            .split(os.EOL)
            .map((line: string) => parseInt(line))
            .filter((value: number) => !isNaN(value))
            .reduce((sum: number, number: number) => sum + number, 1);
          resolve(amount);
        })
        .catch(reject);
    } else {
      const cores = os.cpus().filter((cpu: any, index: number) => {
        const hasHyperthreading = cpu.model.includes("Intel");
        const isOdd = index % 2 === 1;
        return !hasHyperthreading || isOdd;
      });
      amount = cores.length;
      resolve(amount);
    }
  });
 }
--- a/server/package.json
+++ b/server/package.json
@ -26,6 +26,8 @@
    "dotenv": "^16.3.1",
    "fastify": "^4.24.3",
    "request": "^2.88.2",
    "fetch-retry": "^5.0.6",
    "tcp-port-used": "^1.0.2",
    "request-progress": "^3.0.0"
  },
  "devDependencies": {
@ -35,6 +37,7 @@
    "@typescript-eslint/parser": "^6.7.3",
    "eslint-plugin-react": "^7.33.2",
    "run-script-os": "^1.1.6",
    "@types/tcp-port-used": "^1.0.4",
    "typescript": "^5.2.2"
  }
 }
--- a/web/hooks/useCreateNewThread.ts
+++ b/web/hooks/useCreateNewThread.ts
@ -7,7 +7,7 @@ import {
  ThreadState,
  Model,
 } from '@janhq/core'
-import { atom, useAtom, useAtomValue, useSetAtom } from 'jotai'
+import { atom, useAtomValue, useSetAtom } from 'jotai'
 import { fileUploadAtom } from '@/containers/Providers/Jotai'
@ -48,7 +48,8 @@ export const useCreateNewThread = () => {
  const createNewThread = useSetAtom(createNewThreadAtom)
  const setActiveThreadId = useSetAtom(setActiveThreadIdAtom)
  const updateThread = useSetAtom(updateThreadAtom)
-  const [fileUpload, setFileUpload] = useAtom(fileUploadAtom)
+
  const setFileUpload = useSetAtom(fileUploadAtom)
  const { deleteThread } = useDeleteThread()
  const requestCreateNewThread = async (
--- a/web/hooks/useSetActiveThread.ts
+++ b/web/hooks/useSetActiveThread.ts
@ -1,5 +1,3 @@
 import { useEffect } from 'react'
 import {
  InferenceEvent,
  ExtensionTypeEnum,
--- a/web/screens/LocalServer/index.tsx
+++ b/web/screens/LocalServer/index.tsx
@ -1,7 +1,6 @@
 /* eslint-disable @typescript-eslint/no-explicit-any */
 'use client'
-import React, { useEffect, useState } from 'react'
+import React, { useCallback, useEffect, useState } from 'react'
 import ScrollToBottom from 'react-scroll-to-bottom'
@ -81,14 +80,17 @@ const LocalServerScreen = () => {
  const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
    useState<boolean>(false)
-  const handleChangePort = (value: any) => {
+  const handleChangePort = useCallback(
-    if (Number(value) <= 0 || Number(value) >= 65536) {
+    (value: string) => {
-      setErrorRangePort(true)
+      if (Number(value) <= 0 || Number(value) >= 65536) {
-    } else {
+        setErrorRangePort(true)
-      setErrorRangePort(false)
+      } else {
-    }
+        setErrorRangePort(false)
-    setPort(value)
+      }
-  }
+      setPort(value)
    },
    [setPort]
  )
  useEffect(() => {
    if (localStorage.getItem(FIRST_TIME_VISIT_API_SERVER) == null) {
@ -98,7 +100,7 @@ const LocalServerScreen = () => {
  useEffect(() => {
    handleChangePort(port)
-  }, [])
+  }, [handleChangePort, port])
  return (
    <div className="flex h-full w-full">
--- a/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
+++ b/web/screens/Settings/Advanced/FactoryReset/ModalConfirmReset.tsx
@ -1,6 +1,4 @@
-import React, { useCallback, useEffect, useState } from 'react'
+import React, { useCallback, useState } from 'react'
 import { fs, AppConfiguration, joinPath, getUserHomePath } from '@janhq/core'
 import {
  Modal,
		`@ -0,0 +1,2 @@`
							`export * from './systemResourceInfo'`
							`export * from './promptTemplate'`