fix: model path backward compatible (#2018)

This commit is contained in:
Louis 2024-02-14 23:04:46 +07:00 committed by GitHub
parent f2e31874e1
commit f0fd2c5a2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 185 additions and 209 deletions

View File

@ -58,6 +58,7 @@ export class Downloader implements Processor {
const currentDownloadState = DownloadManager.instance.downloadProgressMap[modelId]
const downloadState: DownloadState = {
...currentDownloadState,
error: error.message,
downloadState: 'error',
}
if (currentDownloadState) {

View File

@ -1,8 +0,0 @@
{
"semi": false,
"singleQuote": true,
"quoteProps": "consistent",
"trailingComma": "es5",
"endOfLine": "auto",
"plugins": ["prettier-plugin-tailwindcss"]
}

View File

@ -1,8 +0,0 @@
{
"semi": false,
"singleQuote": true,
"quoteProps": "consistent",
"trailingComma": "es5",
"endOfLine": "auto",
"plugins": ["prettier-plugin-tailwindcss"]
}

View File

@ -26,6 +26,7 @@ import {
ModelEvent,
InferenceEvent,
ModelSettingParams,
getJanDataFolderPath,
} from "@janhq/core";
import { requestInference } from "./helpers/sse";
import { ulid } from "ulid";
@ -146,11 +147,14 @@ export default class JanInferenceNitroExtension extends InferenceExtension {
private async onModelInit(model: Model) {
if (model.engine !== InferenceEngine.nitro) return;
const modelFullPath = await joinPath(["models", model.id]);
const modelFolder = await joinPath([
await getJanDataFolderPath(),
"models",
model.id,
]);
this._currentModel = model;
const nitroInitResult = await executeOnMain(NODE, "runModel", {
modelFullPath,
modelFolder,
model,
});

View File

@ -1,55 +1,50 @@
import fs from "fs";
import path from "path";
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import tcpPortUsed from "tcp-port-used";
import fetchRT from "fetch-retry";
import {
log,
getJanDataFolderPath,
getSystemResourceInfo,
} from "@janhq/core/node";
import { getNitroProcessInfo, updateNvidiaInfo } from "./nvidia";
import fs from 'fs'
import path from 'path'
import { ChildProcessWithoutNullStreams, spawn } from 'child_process'
import tcpPortUsed from 'tcp-port-used'
import fetchRT from 'fetch-retry'
import { log, getSystemResourceInfo } from '@janhq/core/node'
import { getNitroProcessInfo, updateNvidiaInfo } from './nvidia'
import {
Model,
InferenceEngine,
ModelSettingParams,
PromptTemplate,
} from "@janhq/core";
import { executableNitroFile } from "./execute";
} from '@janhq/core'
import { executableNitroFile } from './execute'
// Polyfill fetch with retry
const fetchRetry = fetchRT(fetch);
const fetchRetry = fetchRT(fetch)
/**
* The response object for model init operation.
*/
interface ModelInitOptions {
modelFullPath: string;
model: Model;
modelFolder: string
model: Model
}
// The PORT to use for the Nitro subprocess
const PORT = 3928;
const PORT = 3928
// The HOST address to use for the Nitro subprocess
const LOCAL_HOST = "127.0.0.1";
const LOCAL_HOST = '127.0.0.1'
// The URL for the Nitro subprocess
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`;
const NITRO_HTTP_SERVER_URL = `http://${LOCAL_HOST}:${PORT}`
// The URL for the Nitro subprocess to load a model
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`;
const NITRO_HTTP_LOAD_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/loadmodel`
// The URL for the Nitro subprocess to validate a model
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`;
const NITRO_HTTP_VALIDATE_MODEL_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/modelstatus`
// The URL for the Nitro subprocess to kill itself
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`
// The supported model format
// TODO: Should be an array to support more models
const SUPPORTED_MODEL_FORMAT = ".gguf";
const SUPPORTED_MODEL_FORMAT = '.gguf'
// The subprocess instance for Nitro
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
// The current model file url
let currentModelFile: string = "";
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined
// The current model settings
let currentSettings: ModelSettingParams | undefined = undefined;
let currentSettings: ModelSettingParams | undefined = undefined
/**
* Stops a Nitro subprocess.
@ -57,7 +52,7 @@ let currentSettings: ModelSettingParams | undefined = undefined;
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
function stopModel(): Promise<void> {
return killSubprocess();
return killSubprocess()
}
/**
@ -67,62 +62,79 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/
async function runModel(
wrapper: ModelInitOptions,
wrapper: ModelInitOptions
): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model
return Promise.resolve();
return Promise.resolve()
}
currentModelFile = wrapper.modelFullPath;
const janRoot = await getJanDataFolderPath();
if (!currentModelFile.includes(janRoot)) {
currentModelFile = path.join(janRoot, currentModelFile);
}
const files: string[] = fs.readdirSync(currentModelFile);
// Look for GGUF model file
const ggufBinFile = files.find(
(file) =>
file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
);
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
currentModelFile = path.join(currentModelFile, ggufBinFile);
if (wrapper.model.engine !== InferenceEngine.nitro) {
return Promise.reject("Not a nitro model");
return Promise.reject('Not a nitro model')
} else {
const nitroResourceProbe = await getSystemResourceInfo();
const nitroResourceProbe = await getSystemResourceInfo()
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
if (wrapper.model.settings.prompt_template) {
const promptTemplate = wrapper.model.settings.prompt_template;
const prompt = promptTemplateConverter(promptTemplate);
const promptTemplate = wrapper.model.settings.prompt_template
const prompt = promptTemplateConverter(promptTemplate)
if (prompt?.error) {
return Promise.reject(prompt.error);
return Promise.reject(prompt.error)
}
wrapper.model.settings.system_prompt = prompt.system_prompt;
wrapper.model.settings.user_prompt = prompt.user_prompt;
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
wrapper.model.settings.system_prompt = prompt.system_prompt
wrapper.model.settings.user_prompt = prompt.user_prompt
wrapper.model.settings.ai_prompt = prompt.ai_prompt
}
const modelFolderPath = path.join(janRoot, "models", wrapper.model.id);
const modelPath = wrapper.model.settings.llama_model_path
? path.join(modelFolderPath, wrapper.model.settings.llama_model_path)
: currentModelFile;
// modelFolder is the absolute path to the running model folder
// e.g. ~/jan/models/llama-2
let modelFolder = wrapper.modelFolder
let llama_model_path = wrapper.model.settings.llama_model_path
// Absolute model path support
if (
wrapper.model?.sources.length &&
wrapper.model.sources.every((e) => fs.existsSync(e.url))
) {
llama_model_path =
wrapper.model.sources.length === 1
? wrapper.model.sources[0].url
: wrapper.model.sources.find((e) =>
e.url.includes(llama_model_path ?? wrapper.model.id)
)?.url
}
if (!llama_model_path || !path.isAbsolute(llama_model_path)) {
// Look for GGUF model file
const modelFiles: string[] = fs.readdirSync(modelFolder)
const ggufBinFile = modelFiles.find(
(file) =>
// 1. Prioritize llama_model_path (predefined)
(llama_model_path && file === llama_model_path) ||
// 2. Prioritize GGUF File (manual import)
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT) ||
// 3. Fallback Model ID (for backward compatibility)
file === wrapper.model.id
)
if (ggufBinFile) llama_model_path = path.join(modelFolder, ggufBinFile)
}
// Look for absolute source path for single model
if (!llama_model_path) return Promise.reject('No GGUF model file found')
currentSettings = {
...wrapper.model.settings,
llama_model_path: modelPath,
llama_model_path,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(wrapper.model.settings.mmproj && {
mmproj: path.join(modelFolderPath, wrapper.model.settings.mmproj),
mmproj: path.isAbsolute(wrapper.model.settings.mmproj)
? wrapper.model.settings.mmproj
: path.join(modelFolder, wrapper.model.settings.mmproj),
}),
};
return runNitroAndLoadModel();
}
return runNitroAndLoadModel()
}
}
@ -142,10 +154,10 @@ async function runNitroAndLoadModel() {
* Should wait for awhile to make sure the port is free and subprocess is killed
* The tested threshold is 500ms
**/
if (process.platform === "win32") {
return new Promise((resolve) => setTimeout(resolve, 500));
if (process.platform === 'win32') {
return new Promise((resolve) => setTimeout(resolve, 500))
} else {
return Promise.resolve();
return Promise.resolve()
}
})
.then(spawnNitroProcess)
@ -153,9 +165,9 @@ async function runNitroAndLoadModel() {
.then(validateModelStatus)
.catch((err) => {
// TODO: Broadcast error so app could display proper error message
log(`[NITRO]::Error: ${err}`);
return { error: err };
});
log(`[NITRO]::Error: ${err}`)
return { error: err }
})
}
/**
@ -165,43 +177,43 @@ async function runNitroAndLoadModel() {
*/
function promptTemplateConverter(promptTemplate: string): PromptTemplate {
// Split the string using the markers
const systemMarker = "{system_message}";
const promptMarker = "{prompt}";
const systemMarker = '{system_message}'
const promptMarker = '{prompt}'
if (
promptTemplate.includes(systemMarker) &&
promptTemplate.includes(promptMarker)
) {
// Find the indices of the markers
const systemIndex = promptTemplate.indexOf(systemMarker);
const promptIndex = promptTemplate.indexOf(promptMarker);
const systemIndex = promptTemplate.indexOf(systemMarker)
const promptIndex = promptTemplate.indexOf(promptMarker)
// Extract the parts of the string
const system_prompt = promptTemplate.substring(0, systemIndex);
const system_prompt = promptTemplate.substring(0, systemIndex)
const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length,
promptIndex,
);
promptIndex
)
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length,
);
promptIndex + promptMarker.length
)
// Return the split parts
return { system_prompt, user_prompt, ai_prompt };
return { system_prompt, user_prompt, ai_prompt }
} else if (promptTemplate.includes(promptMarker)) {
// Extract the parts of the string for the case where only promptMarker is present
const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex);
const promptIndex = promptTemplate.indexOf(promptMarker)
const user_prompt = promptTemplate.substring(0, promptIndex)
const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length,
);
promptIndex + promptMarker.length
)
// Return the split parts
return { user_prompt, ai_prompt };
return { user_prompt, ai_prompt }
}
// Return an error if none of the conditions are met
return { error: "Cannot split prompt template" };
return { error: 'Cannot split prompt template' }
}
/**
@ -210,13 +222,13 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
*/
function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) {
settings.ngl = 100;
settings.ngl = 100
}
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`)
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST",
method: 'POST',
headers: {
"Content-Type": "application/json",
'Content-Type': 'application/json',
},
body: JSON.stringify(settings),
retries: 3,
@ -225,15 +237,15 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => {
log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
res,
)}`,
);
return Promise.resolve(res);
res
)}`
)
return Promise.resolve(res)
})
.catch((err) => {
log(`[NITRO]::Error: Load model failed with error ${err}`);
return Promise.reject(err);
});
log(`[NITRO]::Error: Load model failed with error ${err}`)
return Promise.reject(err)
})
}
/**
@ -246,9 +258,9 @@ async function validateModelStatus(): Promise<void> {
// Send a GET request to the validation URL.
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
method: "GET",
method: 'GET',
headers: {
"Content-Type": "application/json",
'Content-Type': 'application/json',
},
retries: 5,
retryDelay: 500,
@ -257,10 +269,10 @@ async function validateModelStatus(): Promise<void> {
`[NITRO]::Debug: Validate model state with response ${JSON.stringify(
res.status
)}`
);
)
// If the response is OK, check model_loaded status.
if (res.ok) {
const body = await res.json();
const body = await res.json()
// If the model is loaded, return an empty object.
// Otherwise, return an object with an error message.
if (body.model_loaded) {
@ -268,17 +280,17 @@ async function validateModelStatus(): Promise<void> {
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
body
)}`
);
return Promise.resolve();
)
return Promise.resolve()
}
}
log(
`[NITRO]::Debug: Validate model state failed with response ${JSON.stringify(
res.statusText
)}`
);
return Promise.reject("Validate model status failed");
});
)
return Promise.reject('Validate model status failed')
})
}
/**
@ -286,21 +298,21 @@ async function validateModelStatus(): Promise<void> {
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
*/
async function killSubprocess(): Promise<void> {
const controller = new AbortController();
setTimeout(() => controller.abort(), 5000);
log(`[NITRO]::Debug: Request to kill Nitro`);
const controller = new AbortController()
setTimeout(() => controller.abort(), 5000)
log(`[NITRO]::Debug: Request to kill Nitro`)
return fetch(NITRO_HTTP_KILL_URL, {
method: "DELETE",
method: 'DELETE',
signal: controller.signal,
})
.then(() => {
subprocess?.kill();
subprocess = undefined;
subprocess?.kill()
subprocess = undefined
})
.catch(() => {})
.then(() => tcpPortUsed.waitUntilFree(PORT, 300, 5000))
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`));
.then(() => log(`[NITRO]::Debug: Nitro process is terminated`))
}
/**
@ -308,49 +320,49 @@ async function killSubprocess(): Promise<void> {
* @returns A promise that resolves when the Nitro subprocess is started.
*/
function spawnNitroProcess(): Promise<any> {
log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
log(`[NITRO]::Debug: Spawning Nitro subprocess...`)
return new Promise<void>(async (resolve, reject) => {
let binaryFolder = path.join(__dirname, "..", "bin"); // Current directory by default
let executableOptions = executableNitroFile();
let binaryFolder = path.join(__dirname, '..', 'bin') // Current directory by default
let executableOptions = executableNitroFile()
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
const args: string[] = ['1', LOCAL_HOST, PORT.toString()]
// Execute the binary
log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
);
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
)
subprocess = spawn(
executableOptions.executablePath,
["1", LOCAL_HOST, PORT.toString()],
['1', LOCAL_HOST, PORT.toString()],
{
cwd: binaryFolder,
env: {
...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
},
},
);
}
)
// Handle subprocess output
subprocess.stdout.on("data", (data: any) => {
log(`[NITRO]::Debug: ${data}`);
});
subprocess.stdout.on('data', (data: any) => {
log(`[NITRO]::Debug: ${data}`)
})
subprocess.stderr.on("data", (data: any) => {
log(`[NITRO]::Error: ${data}`);
});
subprocess.stderr.on('data', (data: any) => {
log(`[NITRO]::Error: ${data}`)
})
subprocess.on("close", (code: any) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`);
subprocess = undefined;
reject(`child process exited with code ${code}`);
});
subprocess.on('close', (code: any) => {
log(`[NITRO]::Debug: Nitro exited with code: ${code}`)
subprocess = undefined
reject(`child process exited with code ${code}`)
})
tcpPortUsed.waitUntilUsed(PORT, 300, 30000).then(() => {
log(`[NITRO]::Debug: Nitro is ready`);
resolve();
});
});
log(`[NITRO]::Debug: Nitro is ready`)
resolve()
})
})
}
/**
@ -360,7 +372,7 @@ function spawnNitroProcess(): Promise<any> {
*/
function dispose() {
// clean other registered resources here
killSubprocess();
killSubprocess()
}
export default {
@ -370,4 +382,4 @@ export default {
dispose,
updateNvidiaInfo,
getCurrentNitroProcessInfo: () => getNitroProcessInfo(subprocess),
};
}

View File

@ -1,8 +0,0 @@
{
"semi": false,
"singleQuote": true,
"quoteProps": "consistent",
"trailingComma": "es5",
"endOfLine": "auto",
"plugins": ["prettier-plugin-tailwindcss"]
}

View File

@ -219,15 +219,20 @@ export default class JanModelExtension extends ModelExtension {
async getDownloadedModels(): Promise<Model[]> {
return await this.getModelsMetadata(
async (modelDir: string, model: Model) => {
if (model.engine !== JanModelExtension._offlineInferenceEngine) {
if (model.engine !== JanModelExtension._offlineInferenceEngine)
return true
}
// model binaries (sources) are absolute path & exist
const existFiles = await Promise.all(
model.sources.map((source) => fs.existsSync(source.url))
)
if (existFiles.every((exist) => exist)) return true
return await fs
.readdirSync(await joinPath([JanModelExtension._homeDir, modelDir]))
.then((files: string[]) => {
// or model binary exists in the directory
// model binary name can match model ID or be a .gguf file and not be an incompleted model file
// TODO: Check diff between urls, filenames
// Model binary exists in the directory
// Model binary name can match model ID or be a .gguf file and not be an incompleted model file
return (
files.includes(modelDir) ||
files.filter(
@ -273,8 +278,19 @@ export default class JanModelExtension extends ModelExtension {
if (await fs.existsSync(jsonPath)) {
// if we have the model.json file, read it
let model = await this.readModelMetadata(jsonPath)
model = typeof model === 'object' ? model : JSON.parse(model)
// This to ensure backward compatibility with `model.json` with `source_url`
if (model['source_url'] != null) {
model['sources'] = [
{
filename: model.id,
url: model['source_url'],
},
]
}
if (selector && !(await selector?.(dirName, model))) {
return
}
@ -288,31 +304,18 @@ export default class JanModelExtension extends ModelExtension {
})
const results = await Promise.allSettled(readJsonPromises)
const modelData = results.map((result) => {
if (result.status === 'fulfilled') {
if (result.status === 'fulfilled' && result.value) {
try {
// This to ensure backward compatibility with `model.json` with `source_url`
const tmpModel =
const model =
typeof result.value === 'object'
? result.value
: JSON.parse(result.value)
if (tmpModel['source_url'] != null) {
tmpModel['source'] = [
{
filename: tmpModel.id,
url: tmpModel['source_url'],
},
]
}
return tmpModel as Model
return model as Model
} catch {
console.debug(`Unable to parse model metadata: ${result.value}`)
return undefined
}
} else {
console.error(result.reason)
return undefined
}
return undefined
})
return modelData.filter((e) => !!e)

View File

@ -1,5 +0,0 @@
.next/
node_modules/
dist/
*.hbs
*.mdx

View File

@ -1,8 +0,0 @@
{
"semi": false,
"singleQuote": true,
"quoteProps": "consistent",
"trailingComma": "es5",
"endOfLine": "auto",
"plugins": ["prettier-plugin-tailwindcss"]
}

View File

@ -1,12 +1,5 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import {
AppRoute,
CoreRoutes,
DownloadRoute,
ExtensionRoute,
FileManagerRoute,
FileSystemRoute,
} from '@janhq/core'
import { CoreRoutes } from '@janhq/core'
import { safeJsonParse } from '@/utils/json'