chore: proxies Jan APIs to cortex.cpp

This commit is contained in:
Louis 2024-11-08 17:59:09 +07:00
parent 731d42863c
commit 60235a4078
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
3 changed files with 50 additions and 47 deletions

View File

@ -10,6 +10,7 @@ import {
getMessages, getMessages,
retrieveMessage, retrieveMessage,
updateThread, updateThread,
getModels,
} from './helper/builder' } from './helper/builder'
import { JanApiRouteConfiguration } from './helper/configuration' import { JanApiRouteConfiguration } from './helper/configuration'
@ -26,9 +27,12 @@ export const commonRouter = async (app: HttpServer) => {
// Common Routes // Common Routes
// Read & Delete :: Threads | Models | Assistants // Read & Delete :: Threads | Models | Assistants
Object.keys(JanApiRouteConfiguration).forEach((key) => { Object.keys(JanApiRouteConfiguration).forEach((key) => {
app.get(`/${key}`, async (_request) => app.get(`/${key}`, async (_req, _res) => {
getBuilder(JanApiRouteConfiguration[key]).then(normalizeData) if (key === 'models') {
) return getModels(_req, _res)
}
return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
})
app.get(`/${key}/:id`, async (request: any) => app.get(`/${key}/:id`, async (request: any) =>
retrieveBuilder(JanApiRouteConfiguration[key], request.params.id) retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)

View File

@ -10,9 +10,9 @@ import {
} from 'fs' } from 'fs'
import { JanApiRouteConfiguration, RouteConfiguration } from './configuration' import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
import { join } from 'path' import { join } from 'path'
import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types' import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper' import { getJanDataFolderPath } from '../../../helper'
import { DEFAULT_CHAT_COMPLETION_URL } from './consts' import { CORTEX_API_URL } from './consts'
// TODO: Refactor these // TODO: Refactor these
export const getBuilder = async (configuration: RouteConfiguration) => { export const getBuilder = async (configuration: RouteConfiguration) => {
@ -297,57 +297,56 @@ export const downloadModel = async (
} }
} }
export const chatCompletions = async (request: any, reply: any) => { /**
const modelList = await getBuilder(JanApiRouteConfiguration.models) * Proxy /models to cortex
const modelId = request.body.model * @param request
* @param reply
const matchedModels = modelList.filter((model: Model) => model.id === modelId) */
if (matchedModels.length === 0) { export const getModels = async (request: any, reply: any) => {
const error = { const fetch = require('node-fetch')
error: {
message: `The model ${request.body.model} does not exist`,
type: 'invalid_request_error',
param: null,
code: 'model_not_found',
},
}
reply.code(404).send(error)
return
}
const requestedModel = matchedModels[0]
const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
let apiKey: string | undefined = undefined
let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
if (engineConfiguration) {
apiKey = engineConfiguration.api_key
apiUrl = engineConfiguration.full_url ?? DEFAULT_CHAT_COMPLETION_URL
}
const headers: Record<string, any> = { const headers: Record<string, any> = {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
} }
if (apiKey) { const response = await fetch(`${CORTEX_API_URL}/models`, {
headers['Authorization'] = `Bearer ${apiKey}` method: 'GET',
headers['api-key'] = apiKey headers: headers,
} body: JSON.stringify(request.body),
})
if (requestedModel.engine === 'openai' && request.body.stop) { if (response.status !== 200) {
// openai only allows max 4 stop words // Forward the error response to client via reply
request.body.stop = request.body.stop.slice(0, 4) const responseBody = await response.text()
const responseHeaders = Object.fromEntries(response.headers)
reply.code(response.status).headers(responseHeaders).send(responseBody)
} else {
reply.raw.writeHead(200, {
'Content-Type': 'application/json',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Access-Control-Allow-Origin': '*',
})
response.body.pipe(reply.raw)
}
}
/**
* Proxy chat completions
* @param request
* @param reply
*/
export const chatCompletions = async (request: any, reply: any) => {
const headers: Record<string, any> = {
'Content-Type': 'application/json',
} }
// add engine for new cortex cpp engine // add engine for new cortex cpp engine
if (requestedModel.engine === 'nitro') { if (request.body.engine === InferenceEngine.nitro) {
request.body.engine = 'llama-cpp' request.body.engine = InferenceEngine.cortex_llamacpp
} }
const fetch = require('node-fetch') const fetch = require('node-fetch')
const response = await fetch(apiUrl, { const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
method: 'POST', method: 'POST',
headers: headers, headers: headers,
body: JSON.stringify(request.body), body: JSON.stringify(request.body),

View File

@ -6,4 +6,4 @@ export const LOCAL_HOST = '127.0.0.1'
export const SUPPORTED_MODEL_FORMAT = '.gguf' export const SUPPORTED_MODEL_FORMAT = '.gguf'
export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1` // default nitro url