Merge pull request #2808 from janhq/release/0.4.12
Release/0.4.12 to main
This commit is contained in:
commit
36c86f2afe
@ -63,11 +63,11 @@ const runModel = async (modelId: string, settingParams?: ModelSettingParams): Pr
|
||||
|
||||
const nitroResourceProbe = await getSystemResourceInfo()
|
||||
const nitroModelSettings: NitroModelSettings = {
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||
...modelMetadata.settings,
|
||||
...settingParams,
|
||||
llama_model_path: modelBinaryPath,
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||
...(modelMetadata.settings.mmproj && {
|
||||
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
|
||||
}),
|
||||
|
||||
@ -15,6 +15,8 @@ export const readEmbeddingEngine = (engineName: string) => {
|
||||
const settingDirectoryPath = path.join(
|
||||
getJanDataFolderPath(),
|
||||
'settings',
|
||||
'@janhq',
|
||||
// TODO: James - To be removed
|
||||
engineName === 'openai'
|
||||
? 'inference-openai-extension'
|
||||
: 'inference-groq-extension',
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-nitro-extension",
|
||||
"productName": "Nitro Inference Engine",
|
||||
"version": "1.0.2",
|
||||
"version": "1.0.4",
|
||||
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "command-r-34b",
|
||||
"object": "model",
|
||||
"name": "Command-R v01 34B Q4",
|
||||
"version": "1.2",
|
||||
"version": "1.3",
|
||||
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -27,7 +27,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "CohereAI",
|
||||
"tags": ["34B", "Finetuned"],
|
||||
"tags": ["34B", "Finetuned", "Featured"],
|
||||
"size": 21500000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "hermes-pro-7b",
|
||||
"object": "model",
|
||||
"name": "Hermes Pro 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -27,7 +27,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "NousResearch",
|
||||
"tags": ["7B", "Finetuned", "Featured"],
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "openhermes-neural-7b",
|
||||
"object": "model",
|
||||
"name": "OpenHermes Neural 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -26,7 +26,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Intel, Jan",
|
||||
"tags": ["7B", "Merged", "Featured"],
|
||||
"tags": ["7B", "Merged"],
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png"
|
||||
},
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "stealth-v1.2-7b",
|
||||
"object": "model",
|
||||
"name": "Stealth 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -26,7 +26,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Jan",
|
||||
"tags": ["7B", "Finetuned", "Featured"],
|
||||
"tags": ["7B", "Finetuned"],
|
||||
"size": 4370000000
|
||||
},
|
||||
"engine": "nitro"
|
||||
|
||||
@ -8,7 +8,7 @@
|
||||
"id": "trinity-v1.2-7b",
|
||||
"object": "model",
|
||||
"name": "Trinity-v1.2 7B Q4",
|
||||
"version": "1.0",
|
||||
"version": "1.1",
|
||||
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
|
||||
"format": "gguf",
|
||||
"settings": {
|
||||
@ -26,7 +26,7 @@
|
||||
},
|
||||
"metadata": {
|
||||
"author": "Jan",
|
||||
"tags": ["7B", "Merged", "Featured"],
|
||||
"tags": ["7B", "Merged"],
|
||||
"size": 4370000000,
|
||||
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
|
||||
},
|
||||
|
||||
@ -131,10 +131,11 @@ async function loadModel(
|
||||
if (!llama_model_path) return Promise.reject('No GGUF model file found')
|
||||
|
||||
currentSettings = {
|
||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||
// model.settings can override the default settings
|
||||
...params.model.settings,
|
||||
llama_model_path,
|
||||
// This is critical and requires real CPU physical core count (or performance core)
|
||||
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
|
||||
...(params.model.settings.mmproj && {
|
||||
mmproj: path.isAbsolute(params.model.settings.mmproj)
|
||||
? params.model.settings.mmproj
|
||||
|
||||
@ -67,54 +67,54 @@ export class FileLogger extends Logger {
|
||||
const size = maxFileSizeBytes ?? 1 * 1024 * 1024 // 1 MB
|
||||
const days = daysToKeep ?? 7 // 7 days
|
||||
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
|
||||
|
||||
// Perform log cleaning
|
||||
const currentDate = new Date()
|
||||
fs.readdir(logDirectory, (err, files) => {
|
||||
if (err) {
|
||||
console.error('Error reading log directory:', err)
|
||||
return
|
||||
}
|
||||
if (fs.existsSync(logDirectory))
|
||||
fs.readdir(logDirectory, (err, files) => {
|
||||
if (err) {
|
||||
console.error('Error reading log directory:', err)
|
||||
return
|
||||
}
|
||||
|
||||
files.forEach((file) => {
|
||||
const filePath = path.join(logDirectory, file)
|
||||
fs.stat(filePath, (err, stats) => {
|
||||
if (err) {
|
||||
console.error('Error getting file stats:', err)
|
||||
return
|
||||
}
|
||||
files.forEach((file) => {
|
||||
const filePath = path.join(logDirectory, file)
|
||||
fs.stat(filePath, (err, stats) => {
|
||||
if (err) {
|
||||
console.error('Error getting file stats:', err)
|
||||
return
|
||||
}
|
||||
|
||||
// Check size
|
||||
if (stats.size > size) {
|
||||
fs.unlink(filePath, (err) => {
|
||||
if (err) {
|
||||
console.error('Error deleting log file:', err)
|
||||
return
|
||||
}
|
||||
console.debug(
|
||||
`Deleted log file due to exceeding size limit: ${filePath}`
|
||||
)
|
||||
})
|
||||
} else {
|
||||
// Check age
|
||||
const creationDate = new Date(stats.ctime)
|
||||
const daysDifference = Math.floor(
|
||||
(currentDate.getTime() - creationDate.getTime()) /
|
||||
(1000 * 3600 * 24)
|
||||
)
|
||||
if (daysDifference > days) {
|
||||
// Check size
|
||||
if (stats.size > size) {
|
||||
fs.unlink(filePath, (err) => {
|
||||
if (err) {
|
||||
console.error('Error deleting log file:', err)
|
||||
return
|
||||
}
|
||||
console.debug(`Deleted old log file: ${filePath}`)
|
||||
console.debug(
|
||||
`Deleted log file due to exceeding size limit: ${filePath}`
|
||||
)
|
||||
})
|
||||
} else {
|
||||
// Check age
|
||||
const creationDate = new Date(stats.ctime)
|
||||
const daysDifference = Math.floor(
|
||||
(currentDate.getTime() - creationDate.getTime()) /
|
||||
(1000 * 3600 * 24)
|
||||
)
|
||||
if (daysDifference > days) {
|
||||
fs.unlink(filePath, (err) => {
|
||||
if (err) {
|
||||
console.error('Error deleting log file:', err)
|
||||
return
|
||||
}
|
||||
console.debug(`Deleted old log file: ${filePath}`)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
// Schedule the next execution with doubled delays
|
||||
this.timeout = setTimeout(
|
||||
|
||||
@ -134,10 +134,19 @@ const DropdownListSidebar = ({
|
||||
}
|
||||
|
||||
if (activeThread) {
|
||||
// Default setting ctx_len for the model for a better onboarding experience
|
||||
// TODO: When Cortex support hardware instructions, we should remove this
|
||||
const overriddenSettings =
|
||||
model?.settings.ctx_len && model.settings.ctx_len > 2048
|
||||
? { ctx_len: 2048 }
|
||||
: {}
|
||||
|
||||
const modelParams = {
|
||||
...model?.parameters,
|
||||
...model?.settings,
|
||||
...overriddenSettings,
|
||||
}
|
||||
|
||||
// Update model parameter to the thread state
|
||||
setThreadModelParams(activeThread.id, modelParams)
|
||||
|
||||
|
||||
@ -20,6 +20,8 @@ import { ulid } from 'ulidx'
|
||||
|
||||
import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
|
||||
|
||||
import { toRuntimeParams } from '@/utils/modelParam'
|
||||
|
||||
import { extensionManager } from '@/extension'
|
||||
import {
|
||||
getCurrentChatMessagesAtom,
|
||||
@ -32,6 +34,7 @@ import {
|
||||
threadsAtom,
|
||||
isGeneratingResponseAtom,
|
||||
updateThreadAtom,
|
||||
getActiveThreadModelParamsAtom,
|
||||
} from '@/helpers/atoms/Thread.atom'
|
||||
|
||||
const maxWordForThreadTitle = 10
|
||||
@ -54,6 +57,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
|
||||
const updateThread = useSetAtom(updateThreadAtom)
|
||||
const messagesRef = useRef(messages)
|
||||
const activeModelRef = useRef(activeModel)
|
||||
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
|
||||
const activeModelParamsRef = useRef(activeModelParams)
|
||||
|
||||
useEffect(() => {
|
||||
threadsRef.current = threads
|
||||
@ -71,6 +76,10 @@ export default function EventHandler({ children }: { children: ReactNode }) {
|
||||
activeModelRef.current = activeModel
|
||||
}, [activeModel])
|
||||
|
||||
useEffect(() => {
|
||||
activeModelParamsRef.current = activeModelParams
|
||||
}, [activeModelParams])
|
||||
|
||||
const onNewMessageResponse = useCallback(
|
||||
(message: ThreadMessage) => {
|
||||
if (message.type === MessageRequestType.Thread) {
|
||||
@ -247,6 +256,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
|
||||
},
|
||||
]
|
||||
|
||||
const runtimeParams = toRuntimeParams(activeModelParamsRef.current)
|
||||
|
||||
const messageRequest: MessageRequest = {
|
||||
id: msgId,
|
||||
threadId: message.thread_id,
|
||||
@ -255,6 +266,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
|
||||
model: {
|
||||
...activeModelRef.current,
|
||||
parameters: {
|
||||
...runtimeParams,
|
||||
stream: false,
|
||||
},
|
||||
},
|
||||
|
||||
@ -97,7 +97,7 @@ const ServerLogs = (props: ServerLogsProps) => {
|
||||
</div>
|
||||
</div>
|
||||
<div className="overflow-hidden">
|
||||
{logs.length > 1 ? (
|
||||
{logs.length > 0 ? (
|
||||
<div className="h-full overflow-auto">
|
||||
<code className="inline-block whitespace-pre-line text-xs">
|
||||
{logs.slice(-limit).map((log, i) => {
|
||||
|
||||
@ -25,7 +25,7 @@ export const stateModelAtom = atom<ModelState>({
|
||||
model: undefined,
|
||||
})
|
||||
|
||||
export let loadModelController: AbortController | undefined
|
||||
const pendingModelLoadAtom = atom<boolean>(false)
|
||||
|
||||
export function useActiveModel() {
|
||||
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
|
||||
@ -33,6 +33,7 @@ export function useActiveModel() {
|
||||
const [stateModel, setStateModel] = useAtom(stateModelAtom)
|
||||
const downloadedModels = useAtomValue(downloadedModelsAtom)
|
||||
const setLoadModelError = useSetAtom(loadModelErrorAtom)
|
||||
const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
|
||||
|
||||
const downloadedModelsRef = useRef<Model[]>([])
|
||||
|
||||
@ -40,7 +41,7 @@ export function useActiveModel() {
|
||||
downloadedModelsRef.current = downloadedModels
|
||||
}, [downloadedModels])
|
||||
|
||||
const startModel = async (modelId: string) => {
|
||||
const startModel = async (modelId: string, abortable: boolean = true) => {
|
||||
if (
|
||||
(activeModel && activeModel.id === modelId) ||
|
||||
(stateModel.model?.id === modelId && stateModel.loading)
|
||||
@ -48,7 +49,7 @@ export function useActiveModel() {
|
||||
console.debug(`Model ${modelId} is already initialized. Ignore..`)
|
||||
return Promise.resolve()
|
||||
}
|
||||
loadModelController = new AbortController()
|
||||
setPendingModelLoad(true)
|
||||
|
||||
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
|
||||
|
||||
@ -107,15 +108,16 @@ export function useActiveModel() {
|
||||
})
|
||||
})
|
||||
.catch((error) => {
|
||||
if (loadModelController?.signal.aborted)
|
||||
return Promise.reject(new Error('aborted'))
|
||||
|
||||
setStateModel(() => ({
|
||||
state: 'start',
|
||||
loading: false,
|
||||
model,
|
||||
}))
|
||||
|
||||
if (!pendingModelLoad && abortable) {
|
||||
return Promise.reject(new Error('aborted'))
|
||||
}
|
||||
|
||||
toaster({
|
||||
title: 'Failed!',
|
||||
description: `Model ${model.id} failed to start.`,
|
||||
@ -139,9 +141,15 @@ export function useActiveModel() {
|
||||
.then(() => {
|
||||
setActiveModel(undefined)
|
||||
setStateModel({ state: 'start', loading: false, model: undefined })
|
||||
loadModelController?.abort()
|
||||
setPendingModelLoad(false)
|
||||
})
|
||||
}, [activeModel, setActiveModel, setStateModel, stateModel])
|
||||
}, [
|
||||
activeModel,
|
||||
setActiveModel,
|
||||
setStateModel,
|
||||
setPendingModelLoad,
|
||||
stateModel,
|
||||
])
|
||||
|
||||
const stopInference = useCallback(async () => {
|
||||
// Loading model
|
||||
|
||||
@ -94,6 +94,11 @@ export const useCreateNewThread = () => {
|
||||
settings: assistant.tools && assistant.tools[0].settings,
|
||||
}
|
||||
|
||||
const overriddenSettings =
|
||||
defaultModel?.settings.ctx_len && defaultModel.settings.ctx_len > 2048
|
||||
? { ctx_len: 2048 }
|
||||
: {}
|
||||
|
||||
const createdAt = Date.now()
|
||||
const assistantInfo: ThreadAssistantInfo = {
|
||||
assistant_id: assistant.id,
|
||||
@ -101,7 +106,7 @@ export const useCreateNewThread = () => {
|
||||
tools: experimentalEnabled ? [assistantTools] : assistant.tools,
|
||||
model: {
|
||||
id: defaultModel?.id ?? '*',
|
||||
settings: defaultModel?.settings ?? {},
|
||||
settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {},
|
||||
parameters: defaultModel?.parameters ?? {},
|
||||
engine: defaultModel?.engine,
|
||||
},
|
||||
@ -126,6 +131,7 @@ export const useCreateNewThread = () => {
|
||||
setThreadModelParams(thread.id, {
|
||||
...defaultModel?.settings,
|
||||
...defaultModel?.parameters,
|
||||
...overriddenSettings,
|
||||
})
|
||||
|
||||
// Delete the file upload state
|
||||
|
||||
@ -165,6 +165,21 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
|
||||
requireModelReload: true,
|
||||
configType: 'setting',
|
||||
},
|
||||
cpu_threads: {
|
||||
key: 'cpu_threads',
|
||||
title: 'CPU Threads',
|
||||
description:
|
||||
'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)',
|
||||
controllerType: 'slider',
|
||||
controllerProps: {
|
||||
min: 0,
|
||||
max: 128,
|
||||
step: 1,
|
||||
value: 1,
|
||||
},
|
||||
requireModelReload: true,
|
||||
configType: 'setting',
|
||||
},
|
||||
// assistant
|
||||
chunk_size: {
|
||||
key: 'chunk_size',
|
||||
|
||||
@ -155,12 +155,12 @@ const LocalServerScreen = () => {
|
||||
isCorsEnabled,
|
||||
isVerboseEnabled,
|
||||
})
|
||||
await startModel(selectedModel.id)
|
||||
if (isStarted) setServerEnabled(true)
|
||||
if (firstTimeVisitAPIServer) {
|
||||
localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
|
||||
setFirstTimeVisitAPIServer(false)
|
||||
}
|
||||
startModel(selectedModel.id, false).catch((e) => console.error(e))
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
toaster({
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user