chore: API server works with cortex.cpp
This commit is contained in:
parent
8ccbb56f95
commit
981675f365
@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
|
|||||||
|
|
||||||
// add engine for new cortex cpp engine
|
// add engine for new cortex cpp engine
|
||||||
if (requestedModel.engine === 'nitro') {
|
if (requestedModel.engine === 'nitro') {
|
||||||
request.body.engine = 'cortex.llamacpp'
|
request.body.engine = 'llama-cpp'
|
||||||
}
|
}
|
||||||
|
|
||||||
const fetch = require('node-fetch')
|
const fetch = require('node-fetch')
|
||||||
|
|||||||
@ -1,19 +1,9 @@
|
|||||||
// The PORT to use for the Nitro subprocess
|
// The PORT to use for the Nitro subprocess
|
||||||
export const NITRO_DEFAULT_PORT = 3928
|
export const CORTEX_DEFAULT_PORT = 39291
|
||||||
|
|
||||||
// The HOST address to use for the Nitro subprocess
|
// The HOST address to use for the Nitro subprocess
|
||||||
export const LOCAL_HOST = '127.0.0.1'
|
export const LOCAL_HOST = '127.0.0.1'
|
||||||
|
|
||||||
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
export const SUPPORTED_MODEL_FORMAT = '.gguf'
|
||||||
|
|
||||||
// The URL for the Nitro subprocess
|
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
|
||||||
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
|
|
||||||
// The URL for the Nitro subprocess to load a model
|
|
||||||
export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
|
|
||||||
// The URL for the Nitro subprocess to validate a model
|
|
||||||
export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
|
|
||||||
|
|
||||||
// The URL for the Nitro subprocess to kill itself
|
|
||||||
export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
|
|
||||||
|
|
||||||
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
|
|
||||||
|
|||||||
@ -1,6 +1,5 @@
|
|||||||
import { join } from 'path'
|
|
||||||
import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
|
|
||||||
import { ModelSettingParams } from '../../../../types'
|
import { ModelSettingParams } from '../../../../types'
|
||||||
|
import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start a model
|
* Start a model
|
||||||
@ -9,70 +8,16 @@ import { ModelSettingParams } from '../../../../types'
|
|||||||
* @returns
|
* @returns
|
||||||
*/
|
*/
|
||||||
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
|
||||||
try {
|
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
|
||||||
await runModel(modelId, settingParams)
|
body: JSON.stringify({ model: modelId, ...settingParams }),
|
||||||
|
|
||||||
return {
|
|
||||||
message: `Model ${modelId} started`,
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
return {
|
|
||||||
error: e,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Run a model using installed cortex extension
|
|
||||||
* @param model
|
|
||||||
* @param settingParams
|
|
||||||
*/
|
|
||||||
const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
|
|
||||||
const janDataFolderPath = getJanDataFolderPath()
|
|
||||||
const modelFolder = join(janDataFolderPath, 'models', model)
|
|
||||||
let module = join(
|
|
||||||
getJanExtensionsPath(),
|
|
||||||
'@janhq',
|
|
||||||
'inference-cortex-extension',
|
|
||||||
'dist',
|
|
||||||
'node',
|
|
||||||
'index.cjs'
|
|
||||||
)
|
|
||||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
|
||||||
return import(module).then((extension) =>
|
|
||||||
extension
|
|
||||||
.loadModel(
|
|
||||||
{
|
|
||||||
modelFolder,
|
|
||||||
model,
|
|
||||||
},
|
|
||||||
settingParams
|
|
||||||
)
|
|
||||||
.then(() => log(`[SERVER]::Debug: Model is loaded`))
|
|
||||||
.then({
|
|
||||||
message: 'Model started',
|
|
||||||
})
|
})
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Stop model and kill nitro process.
|
* Stop model.
|
||||||
*/
|
*/
|
||||||
export const stopModel = async (_modelId: string) => {
|
export const stopModel = async (modelId: string) => {
|
||||||
let module = join(
|
return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
|
||||||
getJanExtensionsPath(),
|
body: JSON.stringify({ model: modelId }),
|
||||||
'@janhq',
|
|
||||||
'inference-cortex-extension',
|
|
||||||
'dist',
|
|
||||||
'node',
|
|
||||||
'index.cjs'
|
|
||||||
)
|
|
||||||
// Just reuse the cortex extension implementation, don't duplicate then lost of sync
|
|
||||||
return import(module).then((extension) =>
|
|
||||||
extension
|
|
||||||
.unloadModel()
|
|
||||||
.then(() => log(`[SERVER]::Debug: Model is unloaded`))
|
|
||||||
.then({
|
|
||||||
message: 'Model stopped',
|
|
||||||
})
|
})
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user