Merge pull request #2808 from janhq/release/0.4.12

Release/0.4.12 to main
This commit is contained in:
Van Pham 2024-04-25 09:21:31 +07:00 committed by GitHub
commit 36c86f2afe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 113 additions and 60 deletions

View File

@ -63,11 +63,11 @@ const runModel = async (modelId: string, settingParams?: ModelSettingParams): Pr
const nitroResourceProbe = await getSystemResourceInfo()
const nitroModelSettings: NitroModelSettings = {
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...modelMetadata.settings,
...settingParams,
llama_model_path: modelBinaryPath,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(modelMetadata.settings.mmproj && {
mmproj: join(modelFolderFullPath, modelMetadata.settings.mmproj),
}),

View File

@ -15,6 +15,8 @@ export const readEmbeddingEngine = (engineName: string) => {
const settingDirectoryPath = path.join(
getJanDataFolderPath(),
'settings',
'@janhq',
// TODO: James - To be removed
engineName === 'openai'
? 'inference-openai-extension'
: 'inference-groq-extension',

View File

@ -1,7 +1,7 @@
{
"name": "@janhq/inference-nitro-extension",
"productName": "Nitro Inference Engine",
"version": "1.0.2",
"version": "1.0.4",
"description": "This extension embeds Nitro, a lightweight (3mb) inference engine written in C++. See https://nitro.jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
"main": "dist/index.js",
"node": "dist/node/index.cjs.js",

View File

@ -8,7 +8,7 @@
"id": "command-r-34b",
"object": "model",
"name": "Command-R v01 34B Q4",
"version": "1.2",
"version": "1.3",
"description": "C4AI Command-R developed by CohereAI is optimized for a variety of use cases including reasoning, summarization, and question answering.",
"format": "gguf",
"settings": {
@ -27,7 +27,7 @@
},
"metadata": {
"author": "CohereAI",
"tags": ["34B", "Finetuned"],
"tags": ["34B", "Finetuned", "Featured"],
"size": 21500000000
},
"engine": "nitro"

View File

@ -8,7 +8,7 @@
"id": "hermes-pro-7b",
"object": "model",
"name": "Hermes Pro 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "Hermes Pro is superior in Roleplaying, Reasoning and Explaining problem.",
"format": "gguf",
"settings": {
@ -27,7 +27,7 @@
},
"metadata": {
"author": "NousResearch",
"tags": ["7B", "Finetuned", "Featured"],
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"

View File

@ -8,7 +8,7 @@
"id": "openhermes-neural-7b",
"object": "model",
"name": "OpenHermes Neural 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "OpenHermes Neural is a merged model using the TIES method. It performs well in various benchmarks.",
"format": "gguf",
"settings": {
@ -26,7 +26,7 @@
},
"metadata": {
"author": "Intel, Jan",
"tags": ["7B", "Merged", "Featured"],
"tags": ["7B", "Merged"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/openhermes-neural-7b/cover.png"
},

View File

@ -8,7 +8,7 @@
"id": "stealth-v1.2-7b",
"object": "model",
"name": "Stealth 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "This is a new experimental family designed to enhance Mathematical and Logical abilities.",
"format": "gguf",
"settings": {
@ -26,7 +26,7 @@
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Finetuned", "Featured"],
"tags": ["7B", "Finetuned"],
"size": 4370000000
},
"engine": "nitro"

View File

@ -8,7 +8,7 @@
"id": "trinity-v1.2-7b",
"object": "model",
"name": "Trinity-v1.2 7B Q4",
"version": "1.0",
"version": "1.1",
"description": "Trinity is an experimental model merge using the Slerp method. Recommended for daily assistance purposes.",
"format": "gguf",
"settings": {
@ -26,7 +26,7 @@
},
"metadata": {
"author": "Jan",
"tags": ["7B", "Merged", "Featured"],
"tags": ["7B", "Merged"],
"size": 4370000000,
"cover": "https://raw.githubusercontent.com/janhq/jan/dev/models/trinity-v1.2-7b/cover.png"
},

View File

@ -131,10 +131,11 @@ async function loadModel(
if (!llama_model_path) return Promise.reject('No GGUF model file found')
currentSettings = {
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
// model.settings can override the default settings
...params.model.settings,
llama_model_path,
// This is critical and requires real CPU physical core count (or performance core)
cpu_threads: Math.max(1, nitroResourceProbe.numCpuPhysicalCore),
...(params.model.settings.mmproj && {
mmproj: path.isAbsolute(params.model.settings.mmproj)
? params.model.settings.mmproj

View File

@ -67,54 +67,54 @@ export class FileLogger extends Logger {
const size = maxFileSizeBytes ?? 1 * 1024 * 1024 // 1 MB
const days = daysToKeep ?? 7 // 7 days
const logDirectory = path.join(getJanDataFolderPath(), 'logs')
// Perform log cleaning
const currentDate = new Date()
fs.readdir(logDirectory, (err, files) => {
if (err) {
console.error('Error reading log directory:', err)
return
}
if (fs.existsSync(logDirectory))
fs.readdir(logDirectory, (err, files) => {
if (err) {
console.error('Error reading log directory:', err)
return
}
files.forEach((file) => {
const filePath = path.join(logDirectory, file)
fs.stat(filePath, (err, stats) => {
if (err) {
console.error('Error getting file stats:', err)
return
}
files.forEach((file) => {
const filePath = path.join(logDirectory, file)
fs.stat(filePath, (err, stats) => {
if (err) {
console.error('Error getting file stats:', err)
return
}
// Check size
if (stats.size > size) {
fs.unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
}
console.debug(
`Deleted log file due to exceeding size limit: ${filePath}`
)
})
} else {
// Check age
const creationDate = new Date(stats.ctime)
const daysDifference = Math.floor(
(currentDate.getTime() - creationDate.getTime()) /
(1000 * 3600 * 24)
)
if (daysDifference > days) {
// Check size
if (stats.size > size) {
fs.unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
}
console.debug(`Deleted old log file: ${filePath}`)
console.debug(
`Deleted log file due to exceeding size limit: ${filePath}`
)
})
} else {
// Check age
const creationDate = new Date(stats.ctime)
const daysDifference = Math.floor(
(currentDate.getTime() - creationDate.getTime()) /
(1000 * 3600 * 24)
)
if (daysDifference > days) {
fs.unlink(filePath, (err) => {
if (err) {
console.error('Error deleting log file:', err)
return
}
console.debug(`Deleted old log file: ${filePath}`)
})
}
}
}
})
})
})
})
// Schedule the next execution with doubled delays
this.timeout = setTimeout(

View File

@ -134,10 +134,19 @@ const DropdownListSidebar = ({
}
if (activeThread) {
// Default setting ctx_len for the model for a better onboarding experience
// TODO: When Cortex support hardware instructions, we should remove this
const overriddenSettings =
model?.settings.ctx_len && model.settings.ctx_len > 2048
? { ctx_len: 2048 }
: {}
const modelParams = {
...model?.parameters,
...model?.settings,
...overriddenSettings,
}
// Update model parameter to the thread state
setThreadModelParams(activeThread.id, modelParams)

View File

@ -20,6 +20,8 @@ import { ulid } from 'ulidx'
import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel'
import { toRuntimeParams } from '@/utils/modelParam'
import { extensionManager } from '@/extension'
import {
getCurrentChatMessagesAtom,
@ -32,6 +34,7 @@ import {
threadsAtom,
isGeneratingResponseAtom,
updateThreadAtom,
getActiveThreadModelParamsAtom,
} from '@/helpers/atoms/Thread.atom'
const maxWordForThreadTitle = 10
@ -54,6 +57,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
const updateThread = useSetAtom(updateThreadAtom)
const messagesRef = useRef(messages)
const activeModelRef = useRef(activeModel)
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
const activeModelParamsRef = useRef(activeModelParams)
useEffect(() => {
threadsRef.current = threads
@ -71,6 +76,10 @@ export default function EventHandler({ children }: { children: ReactNode }) {
activeModelRef.current = activeModel
}, [activeModel])
useEffect(() => {
activeModelParamsRef.current = activeModelParams
}, [activeModelParams])
const onNewMessageResponse = useCallback(
(message: ThreadMessage) => {
if (message.type === MessageRequestType.Thread) {
@ -247,6 +256,8 @@ export default function EventHandler({ children }: { children: ReactNode }) {
},
]
const runtimeParams = toRuntimeParams(activeModelParamsRef.current)
const messageRequest: MessageRequest = {
id: msgId,
threadId: message.thread_id,
@ -255,6 +266,7 @@ export default function EventHandler({ children }: { children: ReactNode }) {
model: {
...activeModelRef.current,
parameters: {
...runtimeParams,
stream: false,
},
},

View File

@ -97,7 +97,7 @@ const ServerLogs = (props: ServerLogsProps) => {
</div>
</div>
<div className="overflow-hidden">
{logs.length > 1 ? (
{logs.length > 0 ? (
<div className="h-full overflow-auto">
<code className="inline-block whitespace-pre-line text-xs">
{logs.slice(-limit).map((log, i) => {

View File

@ -25,7 +25,7 @@ export const stateModelAtom = atom<ModelState>({
model: undefined,
})
export let loadModelController: AbortController | undefined
const pendingModelLoadAtom = atom<boolean>(false)
export function useActiveModel() {
const [activeModel, setActiveModel] = useAtom(activeModelAtom)
@ -33,6 +33,7 @@ export function useActiveModel() {
const [stateModel, setStateModel] = useAtom(stateModelAtom)
const downloadedModels = useAtomValue(downloadedModelsAtom)
const setLoadModelError = useSetAtom(loadModelErrorAtom)
const [pendingModelLoad, setPendingModelLoad] = useAtom(pendingModelLoadAtom)
const downloadedModelsRef = useRef<Model[]>([])
@ -40,7 +41,7 @@ export function useActiveModel() {
downloadedModelsRef.current = downloadedModels
}, [downloadedModels])
const startModel = async (modelId: string) => {
const startModel = async (modelId: string, abortable: boolean = true) => {
if (
(activeModel && activeModel.id === modelId) ||
(stateModel.model?.id === modelId && stateModel.loading)
@ -48,7 +49,7 @@ export function useActiveModel() {
console.debug(`Model ${modelId} is already initialized. Ignore..`)
return Promise.resolve()
}
loadModelController = new AbortController()
setPendingModelLoad(true)
let model = downloadedModelsRef?.current.find((e) => e.id === modelId)
@ -107,15 +108,16 @@ export function useActiveModel() {
})
})
.catch((error) => {
if (loadModelController?.signal.aborted)
return Promise.reject(new Error('aborted'))
setStateModel(() => ({
state: 'start',
loading: false,
model,
}))
if (!pendingModelLoad && abortable) {
return Promise.reject(new Error('aborted'))
}
toaster({
title: 'Failed!',
description: `Model ${model.id} failed to start.`,
@ -139,9 +141,15 @@ export function useActiveModel() {
.then(() => {
setActiveModel(undefined)
setStateModel({ state: 'start', loading: false, model: undefined })
loadModelController?.abort()
setPendingModelLoad(false)
})
}, [activeModel, setActiveModel, setStateModel, stateModel])
}, [
activeModel,
setActiveModel,
setStateModel,
setPendingModelLoad,
stateModel,
])
const stopInference = useCallback(async () => {
// Loading model

View File

@ -94,6 +94,11 @@ export const useCreateNewThread = () => {
settings: assistant.tools && assistant.tools[0].settings,
}
const overriddenSettings =
defaultModel?.settings.ctx_len && defaultModel.settings.ctx_len > 2048
? { ctx_len: 2048 }
: {}
const createdAt = Date.now()
const assistantInfo: ThreadAssistantInfo = {
assistant_id: assistant.id,
@ -101,7 +106,7 @@ export const useCreateNewThread = () => {
tools: experimentalEnabled ? [assistantTools] : assistant.tools,
model: {
id: defaultModel?.id ?? '*',
settings: defaultModel?.settings ?? {},
settings: { ...defaultModel?.settings, ...overriddenSettings } ?? {},
parameters: defaultModel?.parameters ?? {},
engine: defaultModel?.engine,
},
@ -126,6 +131,7 @@ export const useCreateNewThread = () => {
setThreadModelParams(thread.id, {
...defaultModel?.settings,
...defaultModel?.parameters,
...overriddenSettings,
})
// Delete the file upload state

View File

@ -165,6 +165,21 @@ export const presetConfiguration: Record<string, SettingComponentProps> = {
requireModelReload: true,
configType: 'setting',
},
cpu_threads: {
key: 'cpu_threads',
title: 'CPU Threads',
description:
'Determines CPU inference threads, limited by hardware and OS. (Maximum determined by system)',
controllerType: 'slider',
controllerProps: {
min: 0,
max: 128,
step: 1,
value: 1,
},
requireModelReload: true,
configType: 'setting',
},
// assistant
chunk_size: {
key: 'chunk_size',

View File

@ -155,12 +155,12 @@ const LocalServerScreen = () => {
isCorsEnabled,
isVerboseEnabled,
})
await startModel(selectedModel.id)
if (isStarted) setServerEnabled(true)
if (firstTimeVisitAPIServer) {
localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
setFirstTimeVisitAPIServer(false)
}
startModel(selectedModel.id, false).catch((e) => console.error(e))
} catch (e) {
console.error(e)
toaster({