chore: API server works with cortex.cpp

This commit is contained in:
Louis 2024-10-22 16:03:07 +07:00
parent 8ccbb56f95
commit 981675f365
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
3 changed files with 12 additions and 77 deletions

View File

@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
// add engine for new cortex cpp engine
if (requestedModel.engine === 'nitro') {
request.body.engine = 'cortex.llamacpp'
request.body.engine = 'llama-cpp'
}
const fetch = require('node-fetch')

View File

@ -1,19 +1,9 @@
// The PORT to use for the Nitro subprocess
export const NITRO_DEFAULT_PORT = 3928
export const CORTEX_DEFAULT_PORT = 39291
// The HOST address to use for the Nitro subprocess
export const LOCAL_HOST = '127.0.0.1'
export const SUPPORTED_MODEL_FORMAT = '.gguf'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
// The URL for the Nitro subprocess to load a model
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
// The URL for the Nitro subprocess to validate a model
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
// The URL for the Nitro subprocess to kill itself
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url

View File

@ -1,6 +1,5 @@
import { join } from 'path'
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
import { ModelSettingParams } from '../../../../types'
import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
/**
* Start a model
@ -9,70 +8,16 @@ import { ModelSettingParams } from '../../../../types'
* @returns
*/
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
try {
await runModel(modelId, settingParams)
return {
message: `Model ${modelId} started`,
}
} catch (e) {
return {
error: e,
}
}
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
body: JSON.stringify({ model: modelId, ...settingParams }),
})
}
/**
* Run a model using installed cortex extension
* @param model
* @param settingParams
*/
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
const janDataFolderPath = getJanDataFolderPath()
const modelFolder = join(janDataFolderPath, 'models', model)
let module = join(
getJanExtensionsPath(),
'@janhq',
'inference-cortex-extension',
'dist',
'node',
'index.cjs'
)
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
return import(module).then((extension) =>
extension
.loadModel(
{
modelFolder,
model,
},
settingParams
)
.then(() => log(`[SERVER]::Debug: Model is loaded`))
.then({
message: 'Model started',
})
)
}
/*
* Stop model and kill nitro process.
* Stop model.
*/
export const stopModel = async (_modelId: string) => {
let module = join(
getJanExtensionsPath(),
'@janhq',
'inference-cortex-extension',
'dist',
'node',
'index.cjs'
)
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
return import(module).then((extension) =>
extension
.unloadModel()
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
.then({
message: 'Model stopped',
})
)
export const stopModel = async (modelId: string) => {
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
body: JSON.stringify({ model: modelId }),
})
}