feat: Add default value for ngl (#1886)

* fix: Add fallback value for ngl

* fix: Handling type
This commit is contained in:
hiro 2024-02-01 19:11:05 +07:00 committed by GitHub
parent ae073d2703
commit 11e2a763cb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -27,15 +27,6 @@ interface ModelInitOptions {
modelFullPath: string; modelFullPath: string;
model: Model; model: Model;
} }
/**
* Model setting args for Nitro model load.
*/
interface ModelSettingArgs extends ModelSettingParams {
llama_model_path: string;
cpu_threads: number;
}
// The PORT to use for the Nitro subprocess // The PORT to use for the Nitro subprocess
const PORT = 3928; const PORT = 3928;
// The HOST address to use for the Nitro subprocess // The HOST address to use for the Nitro subprocess
@ -58,7 +49,7 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
// The current model file url // The current model file url
let currentModelFile: string = ""; let currentModelFile: string = "";
// The current model settings // The current model settings
let currentSettings: ModelSettingArgs | undefined = undefined; let currentSettings: ModelSettingParams | undefined = undefined;
/** /**
* Stops a Nitro subprocess. * Stops a Nitro subprocess.
@ -76,7 +67,7 @@ function stopModel(): Promise<void> {
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
*/ */
async function runModel( async function runModel(
wrapper: ModelInitOptions, wrapper: ModelInitOptions
): Promise<ModelOperationResponse | void> { ): Promise<ModelOperationResponse | void> {
if (wrapper.model.engine !== InferenceEngine.nitro) { if (wrapper.model.engine !== InferenceEngine.nitro) {
// Not a nitro model // Not a nitro model
@ -94,7 +85,7 @@ async function runModel(
const ggufBinFile = files.find( const ggufBinFile = files.find(
(file) => (file) =>
file === path.basename(currentModelFile) || file === path.basename(currentModelFile) ||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT), file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
); );
if (!ggufBinFile) return Promise.reject("No GGUF model file found"); if (!ggufBinFile) return Promise.reject("No GGUF model file found");
@ -189,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const system_prompt = promptTemplate.substring(0, systemIndex); const system_prompt = promptTemplate.substring(0, systemIndex);
const user_prompt = promptTemplate.substring( const user_prompt = promptTemplate.substring(
systemIndex + systemMarker.length, systemIndex + systemMarker.length,
promptIndex, promptIndex
); );
const ai_prompt = promptTemplate.substring( const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length, promptIndex + promptMarker.length
); );
// Return the split parts // Return the split parts
@ -202,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
const promptIndex = promptTemplate.indexOf(promptMarker); const promptIndex = promptTemplate.indexOf(promptMarker);
const user_prompt = promptTemplate.substring(0, promptIndex); const user_prompt = promptTemplate.substring(0, promptIndex);
const ai_prompt = promptTemplate.substring( const ai_prompt = promptTemplate.substring(
promptIndex + promptMarker.length, promptIndex + promptMarker.length
); );
// Return the split parts // Return the split parts
@ -218,6 +209,9 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
*/ */
function loadLLMModel(settings: any): Promise<Response> { function loadLLMModel(settings: any): Promise<Response> {
if (!settings?.ngl) {
settings.ngl = 100;
}
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`); log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, { return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
method: "POST", method: "POST",
@ -231,8 +225,8 @@ function loadLLMModel(settings: any): Promise<Response> {
.then((res) => { .then((res) => {
log( log(
`[NITRO]::Debug: Load model success with response ${JSON.stringify( `[NITRO]::Debug: Load model success with response ${JSON.stringify(
res, res
)}`, )}`
); );
return Promise.resolve(res); return Promise.resolve(res);
}) })
@ -261,8 +255,8 @@ async function validateModelStatus(): Promise<void> {
}).then(async (res: Response) => { }).then(async (res: Response) => {
log( log(
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify( `[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
res, res
)}`, )}`
); );
// If the response is OK, check model_loaded status. // If the response is OK, check model_loaded status.
if (res.ok) { if (res.ok) {
@ -313,7 +307,7 @@ function spawnNitroProcess(): Promise<any> {
const args: string[] = ["1", LOCAL_HOST, PORT.toString()]; const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
// Execute the binary // Execute the binary
log( log(
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`, `[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
); );
subprocess = spawn( subprocess = spawn(
executableOptions.executablePath, executableOptions.executablePath,
@ -324,7 +318,7 @@ function spawnNitroProcess(): Promise<any> {
...process.env, ...process.env,
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices, CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
}, },
}, }
); );
// Handle subprocess output // Handle subprocess output