From 981675f3658dc8aea087c70fed08ff508271d57d Mon Sep 17 00:00:00 2001
From: Louis <louis@jan.ai>
Date: Tue, 22 Oct 2024 16:03:07 +0700
Subject: [PATCH] chore: API server works with cortex.cpp

---
 core/src/node/api/restful/helper/builder.ts   |  2 +-
 core/src/node/api/restful/helper/consts.ts    | 14 +---
 .../node/api/restful/helper/startStopModel.ts | 73 +++----------------
 3 files changed, 12 insertions(+), 77 deletions(-)
diff --git a/core/src/node/api/restful/helper/builder.ts b/core/src/node/api/restful/helper/builder.ts
index db2000d69..da33808dc 100644
--- a/core/src/node/api/restful/helper/builder.ts
+++ b/core/src/node/api/restful/helper/builder.ts
@@ -343,7 +343,7 @@ export const chatCompletions = async (request: any, reply: any) => {
 
   // add engine for new cortex cpp engine
   if (requestedModel.engine === 'nitro') {
-    request.body.engine = 'cortex.llamacpp'
+    request.body.engine = 'llama-cpp'
   }
 
   const fetch = require('node-fetch')
diff --git a/core/src/node/api/restful/helper/consts.ts b/core/src/node/api/restful/helper/consts.ts
index 8d8f8e341..0f57bb5ff 100644
--- a/core/src/node/api/restful/helper/consts.ts
+++ b/core/src/node/api/restful/helper/consts.ts
@@ -1,19 +1,9 @@
 // The PORT to use for the Nitro subprocess
-export const NITRO_DEFAULT_PORT = 3928
+export const CORTEX_DEFAULT_PORT = 39291
 
 // The HOST address to use for the Nitro subprocess
 export const LOCAL_HOST = '127.0.0.1'
 
 export const SUPPORTED_MODEL_FORMAT = '.gguf'
 
-// The URL for the Nitro subprocess
-const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}`
-// The URL for the Nitro subprocess to load a model
-export const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/loadmodel`
-// The URL for the Nitro subprocess to validate a model
-export const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/server/modelstatus`
-
-// The URL for the Nitro subprocess to kill itself
-export const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
-
-export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${NITRO_DEFAULT_PORT}/inferences/server/chat_completion` // default nitro url
+export const DEFAULT_CHAT_COMPLETION_URL = `http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/chat/completions` // default nitro url
diff --git a/core/src/node/api/restful/helper/startStopModel.ts b/core/src/node/api/restful/helper/startStopModel.ts
index d1a23dca9..857567612 100644
--- a/core/src/node/api/restful/helper/startStopModel.ts
+++ b/core/src/node/api/restful/helper/startStopModel.ts
@@ -1,6 +1,5 @@
-import { join } from 'path'
-import { getJanDataFolderPath, getJanExtensionsPath, log } from '../../../helper'
 import { ModelSettingParams } from '../../../../types'
+import { CORTEX_DEFAULT_PORT, LOCAL_HOST } from './consts'
 
 /**
  * Start a model
@@ -9,70 +8,16 @@ import { ModelSettingParams } from '../../../../types'
  * @returns
  */
 export const startModel = async (modelId: string, settingParams?: ModelSettingParams) => {
-  try {
-    await runModel(modelId, settingParams)
-
-    return {
-      message: `Model ${modelId} started`,
-    }
-  } catch (e) {
-    return {
-      error: e,
-    }
-  }
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/start`, {
+    body: JSON.stringify({ model: modelId, ...settingParams }),
+  })
 }
 
-/**
- * Run a model using installed cortex extension
- * @param model
- * @param settingParams
- */
-const runModel = async (model: string, settingParams?: ModelSettingParams): Promise<void> => {
-  const janDataFolderPath = getJanDataFolderPath()
-  const modelFolder = join(janDataFolderPath, 'models', model)
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .loadModel(
-        {
-          modelFolder,
-          model,
-        },
-        settingParams
-      )
-      .then(() => log(`[SERVER]::Debug: Model is loaded`))
-      .then({
-        message: 'Model started',
-      })
-  )
-}
 /*
- * Stop model and kill nitro process.
+ * Stop model.
  */
-export const stopModel = async (_modelId: string) => {
-  let module = join(
-    getJanExtensionsPath(),
-    '@janhq',
-    'inference-cortex-extension',
-    'dist',
-    'node',
-    'index.cjs'
-  )
-  // Just reuse the cortex extension implementation, don't duplicate then lost of sync
-  return import(module).then((extension) =>
-    extension
-      .unloadModel()
-      .then(() => log(`[SERVER]::Debug: Model is unloaded`))
-      .then({
-        message: 'Model stopped',
-      })
-  )
+export const stopModel = async (modelId: string) => {
+  return fetch(`http://${LOCAL_HOST}:${CORTEX_DEFAULT_PORT}/v1/models/stop`, {
+    body: JSON.stringify({ model: modelId }),
+  })
 }