feat: Add default value for ngl (#1886)
* fix: Add fallback value for ngl * fix: Handling type
This commit is contained in:
parent
ae073d2703
commit
11e2a763cb
@ -27,15 +27,6 @@ interface ModelInitOptions {
|
|||||||
modelFullPath: string;
|
modelFullPath: string;
|
||||||
model: Model;
|
model: Model;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Model setting args for Nitro model load.
|
|
||||||
*/
|
|
||||||
interface ModelSettingArgs extends ModelSettingParams {
|
|
||||||
llama_model_path: string;
|
|
||||||
cpu_threads: number;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The PORT to use for the Nitro subprocess
|
// The PORT to use for the Nitro subprocess
|
||||||
const PORT = 3928;
|
const PORT = 3928;
|
||||||
// The HOST address to use for the Nitro subprocess
|
// The HOST address to use for the Nitro subprocess
|
||||||
@ -58,7 +49,7 @@ let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
|
|||||||
// The current model file url
|
// The current model file url
|
||||||
let currentModelFile: string = "";
|
let currentModelFile: string = "";
|
||||||
// The current model settings
|
// The current model settings
|
||||||
let currentSettings: ModelSettingArgs | undefined = undefined;
|
let currentSettings: ModelSettingParams | undefined = undefined;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stops a Nitro subprocess.
|
* Stops a Nitro subprocess.
|
||||||
@ -76,7 +67,7 @@ function stopModel(): Promise<void> {
|
|||||||
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
* TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package
|
||||||
*/
|
*/
|
||||||
async function runModel(
|
async function runModel(
|
||||||
wrapper: ModelInitOptions,
|
wrapper: ModelInitOptions
|
||||||
): Promise<ModelOperationResponse | void> {
|
): Promise<ModelOperationResponse | void> {
|
||||||
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
if (wrapper.model.engine !== InferenceEngine.nitro) {
|
||||||
// Not a nitro model
|
// Not a nitro model
|
||||||
@ -94,7 +85,7 @@ async function runModel(
|
|||||||
const ggufBinFile = files.find(
|
const ggufBinFile = files.find(
|
||||||
(file) =>
|
(file) =>
|
||||||
file === path.basename(currentModelFile) ||
|
file === path.basename(currentModelFile) ||
|
||||||
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT),
|
file.toLowerCase().includes(SUPPORTED_MODEL_FORMAT)
|
||||||
);
|
);
|
||||||
|
|
||||||
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
|
if (!ggufBinFile) return Promise.reject("No GGUF model file found");
|
||||||
@ -189,10 +180,10 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|||||||
const system_prompt = promptTemplate.substring(0, systemIndex);
|
const system_prompt = promptTemplate.substring(0, systemIndex);
|
||||||
const user_prompt = promptTemplate.substring(
|
const user_prompt = promptTemplate.substring(
|
||||||
systemIndex + systemMarker.length,
|
systemIndex + systemMarker.length,
|
||||||
promptIndex,
|
promptIndex
|
||||||
);
|
);
|
||||||
const ai_prompt = promptTemplate.substring(
|
const ai_prompt = promptTemplate.substring(
|
||||||
promptIndex + promptMarker.length,
|
promptIndex + promptMarker.length
|
||||||
);
|
);
|
||||||
|
|
||||||
// Return the split parts
|
// Return the split parts
|
||||||
@ -202,7 +193,7 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|||||||
const promptIndex = promptTemplate.indexOf(promptMarker);
|
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||||
const user_prompt = promptTemplate.substring(0, promptIndex);
|
const user_prompt = promptTemplate.substring(0, promptIndex);
|
||||||
const ai_prompt = promptTemplate.substring(
|
const ai_prompt = promptTemplate.substring(
|
||||||
promptIndex + promptMarker.length,
|
promptIndex + promptMarker.length
|
||||||
);
|
);
|
||||||
|
|
||||||
// Return the split parts
|
// Return the split parts
|
||||||
@ -218,6 +209,9 @@ function promptTemplateConverter(promptTemplate: string): PromptTemplate {
|
|||||||
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
||||||
*/
|
*/
|
||||||
function loadLLMModel(settings: any): Promise<Response> {
|
function loadLLMModel(settings: any): Promise<Response> {
|
||||||
|
if (!settings?.ngl) {
|
||||||
|
settings.ngl = 100;
|
||||||
|
}
|
||||||
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
|
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
|
||||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
@ -231,8 +225,8 @@ function loadLLMModel(settings: any): Promise<Response> {
|
|||||||
.then((res) => {
|
.then((res) => {
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
`[NITRO]::Debug: Load model success with response ${JSON.stringify(
|
||||||
res,
|
res
|
||||||
)}`,
|
)}`
|
||||||
);
|
);
|
||||||
return Promise.resolve(res);
|
return Promise.resolve(res);
|
||||||
})
|
})
|
||||||
@ -261,8 +255,8 @@ async function validateModelStatus(): Promise<void> {
|
|||||||
}).then(async (res: Response) => {
|
}).then(async (res: Response) => {
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
`[NITRO]::Debug: Validate model state success with response ${JSON.stringify(
|
||||||
res,
|
res
|
||||||
)}`,
|
)}`
|
||||||
);
|
);
|
||||||
// If the response is OK, check model_loaded status.
|
// If the response is OK, check model_loaded status.
|
||||||
if (res.ok) {
|
if (res.ok) {
|
||||||
@ -313,7 +307,7 @@ function spawnNitroProcess(): Promise<any> {
|
|||||||
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
|
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
|
||||||
// Execute the binary
|
// Execute the binary
|
||||||
log(
|
log(
|
||||||
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`,
|
`[NITRO]::Debug: Spawn nitro at path: ${executableOptions.executablePath}, and args: ${args}`
|
||||||
);
|
);
|
||||||
subprocess = spawn(
|
subprocess = spawn(
|
||||||
executableOptions.executablePath,
|
executableOptions.executablePath,
|
||||||
@ -324,7 +318,7 @@ function spawnNitroProcess(): Promise<any> {
|
|||||||
...process.env,
|
...process.env,
|
||||||
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
CUDA_VISIBLE_DEVICES: executableOptions.cudaVisibleDevices,
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
);
|
);
|
||||||
|
|
||||||
// Handle subprocess output
|
// Handle subprocess output
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user