chore: proxies Jan APIs to cortex.cpp

2024-11-08 17:59:09 +07:00 · 2024-11-08 17:59:09 +07:00 · 60235a4078
commit 60235a4078
parent 731d42863c
3 changed files with 50 additions and 47 deletions
--- a/core/src/node/api/restful/common.ts
+++ b/core/src/node/api/restful/common.ts
@ -10,6 +10,7 @@ import {
  getMessages,
  retrieveMessage,
  updateThread,
  getModels,
 } from './helper/builder'
 import { JanApiRouteConfiguration } from './helper/configuration'
@ -26,9 +27,12 @@ export const commonRouter = async (app: HttpServer) => {
  // Common Routes
  // Read & Delete :: Threads | Models | Assistants
  Object.keys(JanApiRouteConfiguration).forEach((key) => {
-    app.get(`/${key}`, async (_request) =>
+    app.get(`/${key}`, async (_req, _res) => {
-      getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
+      if (key === 'models') {
-    )
+        return getModels(_req, _res)
      }
      return getBuilder(JanApiRouteConfiguration[key]).then(normalizeData)
    })
    app.get(`/${key}/:id`, async (request: any) =>
      retrieveBuilder(JanApiRouteConfiguration[key], request.params.id)
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@ -10,9 +10,9 @@ import {
 } from 'fs'
 import { JanApiRouteConfiguration, RouteConfiguration } from './configuration'
 import { join } from 'path'
-import { ContentType, MessageStatus, Model, ThreadMessage } from '../../../../types'
+import { ContentType, InferenceEngine, MessageStatus, ThreadMessage } from '../../../../types'
-import { getEngineConfiguration, getJanDataFolderPath } from '../../../helper'
+import { getJanDataFolderPath } from '../../../helper'
-import { DEFAULT_CHAT_COMPLETION_URL } from './consts'
+import { CORTEX_API_URL } from './consts'
 // TODO: Refactor these
 export const getBuilder = async (configuration: RouteConfiguration) => {
@ -297,57 +297,56 @@ export const downloadModel = async (
  }
 }
-export const chatCompletions = async (request: any, reply: any) => {
+/**
-  const modelList = await getBuilder(JanApiRouteConfiguration.models)
+ * Proxy /models to cortex
-  const modelId = request.body.model
+ * @param request
-
+ * @param reply
-  const matchedModels = modelList.filter((model: Model) => model.id === modelId)
+ */
-  if (matchedModels.length === 0) {
+export const getModels = async (request: any, reply: any) => {
-    const error = {
+  const fetch = require('node-fetch')
      error: {
        message: `The model ${request.body.model} does not exist`,
        type: 'invalid_request_error',
        param: null,
        code: 'model_not_found',
      },
    }
    reply.code(404).send(error)
    return
  }
  const requestedModel = matchedModels[0]
  const engineConfiguration = await getEngineConfiguration(requestedModel.engine)
  let apiKey: string | undefined = undefined
  let apiUrl: string = DEFAULT_CHAT_COMPLETION_URL
  if (engineConfiguration) {
    apiKey = engineConfiguration.api_key
    apiUrl = engineConfiguration.full_url ?? DEFAULT_CHAT_COMPLETION_URL
  }
  const headers: Record<string, any> = {
    'Content-Type': 'application/json',
  }
-  if (apiKey) {
+  const response = await fetch(`${CORTEX_API_URL}/models`, {
-    headers['Authorization'] = `Bearer ${apiKey}`
+    method: 'GET',
-    headers['api-key'] = apiKey
+    headers: headers,
-  }
+    body: JSON.stringify(request.body),
  })
-  if (requestedModel.engine === 'openai' && request.body.stop) {
+  if (response.status !== 200) {
-    // openai only allows max 4 stop words
+    // Forward the error response to client via reply
-    request.body.stop = request.body.stop.slice(0, 4)
+    const responseBody = await response.text()
    const responseHeaders = Object.fromEntries(response.headers)
    reply.code(response.status).headers(responseHeaders).send(responseBody)
  } else {
    reply.raw.writeHead(200, {
      'Content-Type': 'application/json',
      'Cache-Control': 'no-cache',
      'Connection': 'keep-alive',
      'Access-Control-Allow-Origin': '*',
    })
    response.body.pipe(reply.raw)
  }
 }
 /**
 * Proxy chat completions
 * @param request
 * @param reply
 */
 export const chatCompletions = async (request: any, reply: any) => {
  const headers: Record<string, any> = {
    'Content-Type': 'application/json',
  }
  // add engine for new cortex cpp engine
-  if (requestedModel.engine === 'nitro') {
+  if (request.body.engine === InferenceEngine.nitro) {
-    request.body.engine = 'llama-cpp'
+    request.body.engine = InferenceEngine.cortex_llamacpp
  }
  const fetch = require('node-fetch')
-  const response = await fetch(apiUrl, {
+  const response = await fetch(`${CORTEX_API_URL}/chat/completions`, {
    method: 'POST',
    headers: headers,
    body: JSON.stringify(request.body),
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@ -6,4 +6,4 @@ export const LOCAL_HOST = '127.0.0.1'
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
+export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1` // default nitro url
`@ -6,4 +6,4 @@ export const LOCAL_HOST = '127.0.0.1'`

	`export const SUPPORTED_MODEL_FORMAT = '.gguf'`	`export const SUPPORTED_MODEL_FORMAT = '.gguf'`

	export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url	export const CORTEX_API_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1` // default nitro url