From 981675f3658dc8aea087c70fed08ff508271d57d Mon Sep 17 00:00:00 2001 From: Louis Date: Tue, 22 Oct 2024 16:03:07 +0700 Subject: [PATCH] chore: API server works with cortex.cpp --- core/src/node/api/restful/helper/builder.ts | 2 +- core/src/node/api/restful/helper/consts.ts | 14 +--- .../node/api/restful/helper/startStopModel.ts | 73 +++---------------- 3 files changed, 12 insertions(+), 77 deletions(-) diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts index db2000d69..da33808dc 100644 --- a/core/src/node/api/restful/helper/builder.ts +++ b/core/src/node/api/restful/helper/builder.ts @@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => { // add engine for new cortex cpp engine if (requestedModel.engine === 'nitro') { - request.body.engine = 'cortex.llamacpp' + request.body.engine = 'llama-cpp' } const fetch = require('node-fetch') diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts index 8d8f8e341..0f57bb5ff 100644 --- a/core/src/node/api/restful/helper/consts.ts +++ b/core/src/node/api/restful/helper/consts.ts @@ -1,19 +1,9 @@ // The PORT to use for the Nitro subprocess -export const NITRO_DEFAULT_PORT = 3928 +export const CORTEX_DEFAULT_PORT = 39291 // The HOST address to use for the Nitro subprocess export const LOCAL_HOST = '127.0.0.1' export const SUPPORTED_MODEL_FORMAT = '.gguf' -// The URL for the Nitro subprocess -const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}` -// The URL for the Nitro subprocess to load a model -export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel` -// The URL for the Nitro subprocess to validate a model -export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus` - -// The URL for the Nitro subprocess to kill itself -export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy` - -export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url +export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts index d1a23dca9..857567612 100644 --- a/core/src/node/api/restful/helper/startStopModel.ts +++ b/core/src/node/api/restful/helper/startStopModel.ts @@ -1,6 +1,5 @@ -import { join } from 'path' -import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper' import { ModelSettingParams } from '../../../../types' +import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts' /** * Start a model @@ -9,70 +8,16 @@ import { ModelSettingParams } from '../../../../types' * @returns */ export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => { - try { - await runModel(modelId, settingParams) - - return { - message: `Model ${modelId} started`, - } - } catch (e) { - return { - error: e, - } - } + return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, { + body: JSON.stringify({ model: modelId, ...settingParams }), + }) } -/** - * Run a model using installed cortex extension - * @param model - * @param settingParams - */ -const runModel = async (model: string, settingParams?: ModelSettingParams): Promise => { - const janDataFolderPath = getJanDataFolderPath() - const modelFolder = join(janDataFolderPath, 'models', model) - let module = join( - getJanExtensionsPath(), - '@janhq', - 'inference-cortex-extension', - 'dist', - 'node', - 'index.cjs' - ) - // Just reuse the cortex extension implementation, don't duplicate then lost of sync - return import(module).then((extension) => - extension - .loadModel( - { - modelFolder, - model, - }, - settingParams - ) - .then(() => log(`[SERVER]::Debug: Model is loaded`)) - .then({ - message: 'Model started', - }) - ) -} /* - * Stop model and kill nitro process. + * Stop model. */ -export const stopModel = async (_modelId: string) => { - let module = join( - getJanExtensionsPath(), - '@janhq', - 'inference-cortex-extension', - 'dist', - 'node', - 'index.cjs' - ) - // Just reuse the cortex extension implementation, don't duplicate then lost of sync - return import(module).then((extension) => - extension - .unloadModel() - .then(() => log(`[SERVER]::Debug: Model is unloaded`)) - .then({ - message: 'Model stopped', - }) - ) +export const stopModel = async (modelId: string) => { + return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, { + body: JSON.stringify({ model: modelId }), + }) }