Merge pull request #952 from janhq/update-prompt-template
feat: Inference Nitro with Prompt Template
This commit is contained in:
commit
f7c7ad5ecf
@ -119,9 +119,7 @@ export type ModelSettingParams = {
|
|||||||
embedding?: boolean
|
embedding?: boolean
|
||||||
n_parallel?: number
|
n_parallel?: number
|
||||||
cpu_threads?: number
|
cpu_threads?: number
|
||||||
system_prompt?: string
|
prompt_template?: string
|
||||||
user_prompt?: string
|
|
||||||
ai_prompt?: string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -46,9 +46,19 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
|
|||||||
} else {
|
} else {
|
||||||
// Gather system information for CPU physical cores and memory
|
// Gather system information for CPU physical cores and memory
|
||||||
const nitroResourceProbe = await getResourcesInfo();
|
const nitroResourceProbe = await getResourcesInfo();
|
||||||
console.log(
|
|
||||||
"Nitro with physical core: " + nitroResourceProbe.numCpuPhysicalCore
|
// Convert settings.prompt_template to system_prompt, user_prompt, ai_prompt
|
||||||
);
|
if (wrapper.model.settings.prompt_template) {
|
||||||
|
const promptTemplate = wrapper.model.settings.prompt_template;
|
||||||
|
const prompt = promptTemplateConverter(promptTemplate);
|
||||||
|
if (prompt.error) {
|
||||||
|
return Promise.resolve({ error: prompt.error });
|
||||||
|
}
|
||||||
|
wrapper.model.settings.system_prompt = prompt.system_prompt;
|
||||||
|
wrapper.model.settings.user_prompt = prompt.user_prompt;
|
||||||
|
wrapper.model.settings.ai_prompt = prompt.ai_prompt;
|
||||||
|
}
|
||||||
|
|
||||||
const settings = {
|
const settings = {
|
||||||
llama_model_path: currentModelFile,
|
llama_model_path: currentModelFile,
|
||||||
...wrapper.model.settings,
|
...wrapper.model.settings,
|
||||||
@ -74,12 +84,53 @@ async function initModel(wrapper: any): Promise<ModelOperationResponse> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function promptTemplateConverter(promptTemplate) {
|
||||||
|
// Split the string using the markers
|
||||||
|
const systemMarker = "{system_message}";
|
||||||
|
const promptMarker = "{prompt}";
|
||||||
|
|
||||||
|
if (
|
||||||
|
promptTemplate.includes(systemMarker) &&
|
||||||
|
promptTemplate.includes(promptMarker)
|
||||||
|
) {
|
||||||
|
// Find the indices of the markers
|
||||||
|
const systemIndex = promptTemplate.indexOf(systemMarker);
|
||||||
|
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||||
|
|
||||||
|
// Extract the parts of the string
|
||||||
|
const system_prompt = promptTemplate.substring(0, systemIndex);
|
||||||
|
const user_prompt = promptTemplate.substring(
|
||||||
|
systemIndex + systemMarker.length,
|
||||||
|
promptIndex
|
||||||
|
);
|
||||||
|
const ai_prompt = promptTemplate.substring(
|
||||||
|
promptIndex + promptMarker.length
|
||||||
|
);
|
||||||
|
|
||||||
|
// Return the split parts
|
||||||
|
return { system_prompt, user_prompt, ai_prompt };
|
||||||
|
} else if (promptTemplate.includes(promptMarker)) {
|
||||||
|
// Extract the parts of the string for the case where only promptMarker is present
|
||||||
|
const promptIndex = promptTemplate.indexOf(promptMarker);
|
||||||
|
const user_prompt = promptTemplate.substring(0, promptIndex);
|
||||||
|
const ai_prompt = promptTemplate.substring(
|
||||||
|
promptIndex + promptMarker.length
|
||||||
|
);
|
||||||
|
const system_prompt = "";
|
||||||
|
|
||||||
|
// Return the split parts
|
||||||
|
return { system_prompt, user_prompt, ai_prompt };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return an error if none of the conditions are met
|
||||||
|
return { error: "Cannot split prompt template" };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
|
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
|
||||||
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
|
||||||
*/
|
*/
|
||||||
function loadLLMModel(settings): Promise<Response> {
|
function loadLLMModel(settings): Promise<Response> {
|
||||||
// Load model config
|
|
||||||
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
return fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
|
||||||
method: "POST",
|
method: "POST",
|
||||||
headers: {
|
headers: {
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
|
||||||
"user_prompt": "USER:\n",
|
|
||||||
"ai_prompt": "ASSISTANT:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -9,9 +9,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
|
||||||
"user_prompt": "### Instruction:\n",
|
|
||||||
"ai_prompt": "### Response:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
|
||||||
"user_prompt": "### Instruction:\n",
|
|
||||||
"ai_prompt": "### Response:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "[INST] <<SYS>>\n",
|
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
|
||||||
"user_prompt": "<</SYS>>\n",
|
|
||||||
"ai_prompt": "[/INST]"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "[INST] <<SYS>>\n",
|
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
|
||||||
"user_prompt": "<</SYS>>\n",
|
|
||||||
"ai_prompt": "[/INST]"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "[INST] <<SYS>>\n",
|
"prompt_template": "[INST] <<SYS>>\n{system_message}<</SYS>>\n{prompt}[/INST]"
|
||||||
"user_prompt": "<</SYS>>\n",
|
|
||||||
"ai_prompt": "[/INST]"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "USER:\n{prompt}\nASSISTANT:"
|
||||||
"user_prompt": "USER:\n",
|
|
||||||
"ai_prompt": "ASSISTANT:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -10,7 +10,8 @@
|
|||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"system_prompt": "",
|
||||||
"user_prompt": "<s>[INST]",
|
"user_prompt": "<s>[INST]",
|
||||||
"ai_prompt": "[/INST]"
|
"ai_prompt": "[/INST]",
|
||||||
|
"prompt_template": "<s>[INST]{prompt}\n[/INST]"
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "<s>[INST]{prompt}\n[/INST]"
|
||||||
"user_prompt": "<s>[INST]",
|
|
||||||
"ai_prompt": "[/INST]"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "### System:\n",
|
"prompt_template": "### System:\n{system_message}### User:\n{prompt}### Assistant:"
|
||||||
"user_prompt": "### User:\n",
|
|
||||||
"ai_prompt": "### Assistant:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "### Instruction:{prompt}\n### Response:"
|
||||||
"user_prompt": "### Instruction:\n",
|
|
||||||
"ai_prompt": "### Response:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "<|im_start|>system\n",
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
||||||
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
|
|
||||||
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "<|im_start|>system\n",
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
||||||
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
|
|
||||||
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "### System Prompt\n",
|
"prompt_template": "### System Prompt\n{system_message}\n### User Message\n{prompt}\n### Assistant"
|
||||||
"user_prompt": "### User Message\n",
|
|
||||||
"ai_prompt": "### Assistant\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 4096,
|
"ctx_len": 4096,
|
||||||
"system_prompt": "<|im_start|>system\n",
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
||||||
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
|
|
||||||
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "GPT4 User: {prompt}<|end_of_turn|>GPT4 Assistant:"
|
||||||
"user_prompt": "GPT4 User: ",
|
|
||||||
"ai_prompt": "<|end_of_turn|>\nGPT4 Assistant: "
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
|
||||||
"user_prompt": "### Instruction: ",
|
|
||||||
"ai_prompt": "\n### Response: "
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "<|system|>\n",
|
"prompt_template": "<|system|>\n{system_message}<|user|>\n{prompt}<|assistant|>"
|
||||||
"user_prompt": "<|user|>\n",
|
|
||||||
"ai_prompt": "<|assistant|>\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "",
|
"prompt_template": "### Instruction:\n{prompt}\n### Response:"
|
||||||
"user_prompt": "### Instruction:\n",
|
|
||||||
"ai_prompt": "### Response:\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "<|im_start|>system\n",
|
"prompt_template": "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
|
||||||
"user_prompt": "<|im_end|>\n<|im_start|>user\n",
|
|
||||||
"ai_prompt": "<|im_end|>\n<|im_start|>assistant\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -8,9 +8,7 @@
|
|||||||
"format": "gguf",
|
"format": "gguf",
|
||||||
"settings": {
|
"settings": {
|
||||||
"ctx_len": 2048,
|
"ctx_len": 2048,
|
||||||
"system_prompt": "<|system|>\n",
|
"prompt_template": "<|system|>\n{system_message}</s>\n<|user|>\n{prompt}</s>\n<|assistant|>"
|
||||||
"user_prompt": "</s>\n<|user|>\n",
|
|
||||||
"ai_prompt": "</s>\n<|assistant|>\n"
|
|
||||||
},
|
},
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"max_tokens": 2048
|
"max_tokens": 2048
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||||
import { EventName, events } from '@janhq/core'
|
import { EventName, events } from '@janhq/core'
|
||||||
import { Model, ModelSettingParams } from '@janhq/core'
|
import { Model } from '@janhq/core'
|
||||||
import { atom, useAtom } from 'jotai'
|
import { atom, useAtom } from 'jotai'
|
||||||
|
|
||||||
import { toaster } from '@/containers/Toast'
|
import { toaster } from '@/containers/Toast'
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user