chore: API server works with cortex.cpp
This commit is contained in:
parent
8ccbb56f95
commit
981675f365
@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
||||
|
||||
// add engine for new cortex cpp engine
|
||||
if (requestedModel.engine === 'nitro') {
|
||||
request.body.engine = 'cortex.llamacpp'
|
||||
request.body.engine = 'llama-cpp'
|
||||
}
|
||||
|
||||
const fetch = require('node-fetch')
|
||||
|
||||
@ -1,19 +1,9 @@
|
||||
// The PORT to use for the Nitro subprocess
|
||||
export const NITRO_DEFAULT_PORT = 3928
|
||||
export const CORTEX_DEFAULT_PORT = 39291
|
||||
|
||||
// The HOST address to use for the Nitro subprocess
|
||||
export const LOCAL_HOST = '127.0.0.1'
|
||||
|
||||
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||
|
||||
// The URL for the Nitro subprocess
|
||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
||||
// The URL for the Nitro subprocess to load a model
|
||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
||||
// The URL for the Nitro subprocess to validate a model
|
||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
||||
|
||||
// The URL for the Nitro subprocess to kill itself
|
||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
||||
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
|
||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
import { join } from 'path'
|
||||
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
|
||||
import { ModelSettingParams } from '../../../../types'
|
||||
import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
|
||||
|
||||
/**
|
||||
* Start a model
|
||||
@ -9,70 +8,16 @@ import { ModelSettingParams } from '../../../../types'
|
||||
* @returns
|
||||
*/
|
||||
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||
try {
|
||||
await runModel(modelId, settingParams)
|
||||
|
||||
return {
|
||||
message: `Model ${modelId} started`,
|
||||
}
|
||||
} catch (e) {
|
||||
return {
|
||||
error: e,
|
||||
}
|
||||
}
|
||||
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
|
||||
body: JSON.stringify({ model: modelId, ...settingParams }),
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Run a model using installed cortex extension
|
||||
* @param model
|
||||
* @param settingParams
|
||||
*/
|
||||
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
|
||||
const janDataFolderPath = getJanDataFolderPath()
|
||||
const modelFolder = join(janDataFolderPath, 'models', model)
|
||||
let module = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'node',
|
||||
'index.cjs'
|
||||
)
|
||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
||||
return import(module).then((extension) =>
|
||||
extension
|
||||
.loadModel(
|
||||
{
|
||||
modelFolder,
|
||||
model,
|
||||
},
|
||||
settingParams
|
||||
)
|
||||
.then(() => log(`[SERVER]::Debug: Model is loaded`))
|
||||
.then({
|
||||
message: 'Model started',
|
||||
})
|
||||
)
|
||||
}
|
||||
/*
|
||||
* Stop model and kill nitro process.
|
||||
* Stop model.
|
||||
*/
|
||||
export const stopModel = async (_modelId: string) => {
|
||||
let module = join(
|
||||
getJanExtensionsPath(),
|
||||
'@janhq',
|
||||
'inference-cortex-extension',
|
||||
'dist',
|
||||
'node',
|
||||
'index.cjs'
|
||||
)
|
||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
||||
return import(module).then((extension) =>
|
||||
extension
|
||||
.unloadModel()
|
||||
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
|
||||
.then({
|
||||
message: 'Model stopped',
|
||||
})
|
||||
)
|
||||
export const stopModel = async (modelId: string) => {
|
||||
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
|
||||
body: JSON.stringify({ model: modelId }),
|
||||
})
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user