diff --git a/extensions/inference-cortex-extension/package.json b/extensions/inference-cortex-extension/package.json index d262ad5ec..c6d3f70b6 100644 --- a/extensions/inference-cortex-extension/package.json +++ b/extensions/inference-cortex-extension/package.json @@ -1,7 +1,7 @@ { "name": "@janhq/inference-cortex-extension", "productName": "Cortex Inference Engine", - "version": "1.0.21", + "version": "1.0.22", "description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.", "main": "dist/index.js", "node": "dist/node/index.cjs.js", diff --git a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json index a2197dab2..90aa50117 100644 --- a/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-3.8b/model.json @@ -1,8 +1,8 @@ { "sources": [ { - "url": "https://huggingface.co/cortexso/phi3/resolve/main/model.gguf", - "filename": "model.gguf" + "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf", + "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf" } ], "id": "phi3-3.8b", @@ -14,7 +14,7 @@ "settings": { "ctx_len": 4096, "prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n", - "llama_model_path": "model.gguf", + "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf", "ngl": 33 }, "parameters": { diff --git a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json index f7131ee98..afce04952 100644 --- a/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json +++ b/extensions/inference-cortex-extension/resources/models/phi3-medium/model.json @@ -1,8 +1,8 @@ { "sources": [ { - "url": "https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q4_K_M.gguf", - "filename": "Phi-3-medium-128k-instruct-Q4_K_M.gguf" + "url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf", + "filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf" } ], "id": "phi3-medium", @@ -14,7 +14,7 @@ "settings": { "ctx_len": 128000, "prompt_template": "<|user|> {prompt}<|end|><|assistant|>", - "llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf", + "llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf", "ngl": 33 }, "parameters": { diff --git a/extensions/inference-cortex-extension/src/index.ts b/extensions/inference-cortex-extension/src/index.ts index 3ff550504..531b407f2 100644 --- a/extensions/inference-cortex-extension/src/index.ts +++ b/extensions/inference-cortex-extension/src/index.ts @@ -83,11 +83,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine { }) } - onUnload(): void { + async onUnload() { console.log('Clean up cortex.cpp services') this.shouldReconnect = false this.clean() - executeOnMain(NODE, 'dispose') + await executeOnMain(NODE, 'dispose') super.onUnload() } diff --git a/extensions/inference-cortex-extension/src/node/cpuInfo.ts b/extensions/inference-cortex-extension/src/node/cpuInfo.ts new file mode 100644 index 000000000..4366a995b --- /dev/null +++ b/extensions/inference-cortex-extension/src/node/cpuInfo.ts @@ -0,0 +1,27 @@ +import { cpuInfo } from 'cpu-instructions' + +// Check the CPU info and determine the supported instruction set +const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') + ? 'avx512' + : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2') + ? 'avx2' + : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX') + ? 'avx' + : 'noavx' + +// Send the result and wait for confirmation before exiting +new Promise((resolve, reject) => { + // @ts-ignore + process.send(info, (error: Error | null) => { + if (error) { + reject(error) + } else { + resolve() + } + }) +}) + .then(() => process.exit(0)) + .catch((error) => { + console.error('Failed to send info:', error) + process.exit(1) + }) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 44b85d515..0b091d464 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -1,6 +1,6 @@ import * as path from 'path' -import { cpuInfo } from 'cpu-instructions' import { GpuSetting, appResourcePath, log } from '@janhq/core/node' +import { fork } from 'child_process' export interface CortexExecutableOptions { enginePath: string @@ -52,7 +52,9 @@ const extension = (): '.exe' | '' => { */ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { const isUsingCuda = - settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac') + settings?.vulkan !== true && + settings?.run_mode === 'gpu' && + !os().includes('mac') if (!isUsingCuda) return undefined return settings?.cuda?.version === '11' ? '11-7' : '12-0' @@ -62,15 +64,29 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. * @returns */ -const cpuInstructions = (): string => { +const cpuInstructions = async (): Promise => { if (process.platform === 'darwin') return '' - return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') - ? 'avx512' - : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2') - ? 'avx2' - : cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX') - ? 'avx' - : 'noavx' + + const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file + + return new Promise((resolve, reject) => { + child.on('message', (cpuInfo?: string) => { + resolve(cpuInfo ?? 'noavx') + child.kill() // Kill the child process after receiving the result + }) + + child.on('error', (err) => { + resolve('noavx') + child.kill() + }) + + child.on('exit', (code) => { + if (code !== 0) { + resolve('noavx') + child.kill() + } + }) + }) } /** @@ -94,8 +110,11 @@ export const executableCortexFile = ( /** * Find which variant to run based on the current platform. */ -export const engineVariant = (gpuSetting?: GpuSetting): string => { - const cpuInstruction = cpuInstructions() +export const engineVariant = async ( + gpuSetting?: GpuSetting +): Promise => { + const cpuInstruction = await cpuInstructions() + log(`[CORTEX]: CPU instruction: ${cpuInstruction}`) let engineVariant = [ os(), gpuSetting?.vulkan diff --git a/joi/src/core/Slider/index.tsx b/joi/src/core/Slider/index.tsx index 40e0c3977..7f8c6cb89 100644 --- a/joi/src/core/Slider/index.tsx +++ b/joi/src/core/Slider/index.tsx @@ -1,5 +1,6 @@ import React from 'react' import * as SliderPrimitive from '@radix-ui/react-slider' +import { twMerge } from 'tailwind-merge' import './styles.scss' @@ -25,7 +26,7 @@ const Slider = ({ disabled, }: Props) => ( (false) + +export const LocalAPIserverModelParamsAtom = atom() diff --git a/web/hooks/useFactoryReset.test.ts b/web/hooks/useFactoryReset.test.ts index b9ec10d6b..7655da24b 100644 --- a/web/hooks/useFactoryReset.test.ts +++ b/web/hooks/useFactoryReset.test.ts @@ -17,6 +17,14 @@ jest.mock('@janhq/core', () => ({ fs: { rm: jest.fn(), }, + EngineManager: { + instance: jest.fn().mockReturnValue({ + get: jest.fn(), + engines: { + values: jest.fn().mockReturnValue([]) + } + }), + }, })) describe('useFactoryReset', () => { diff --git a/web/hooks/useFactoryReset.ts b/web/hooks/useFactoryReset.ts index a8e3efb9a..f68a6fd8c 100644 --- a/web/hooks/useFactoryReset.ts +++ b/web/hooks/useFactoryReset.ts @@ -1,6 +1,6 @@ import { useCallback } from 'react' -import { fs, AppConfiguration } from '@janhq/core' +import { fs, AppConfiguration, EngineManager } from '@janhq/core' import { atom, useAtomValue, useSetAtom } from 'jotai' import { useActiveModel } from './useActiveModel' @@ -37,6 +37,15 @@ export default function useFactoryReset() { // 1: Stop running model setFactoryResetState(FactoryResetState.StoppingModel) await stopModel() + + await Promise.all( + EngineManager.instance() + .engines.values() + .map(async (engine) => { + await engine.onUnload() + }) + ) + await new Promise((resolve) => setTimeout(resolve, 4000)) // 2: Delete the old jan data folder diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts index 75c86035a..ef15de763 100644 --- a/web/hooks/useModels.ts +++ b/web/hooks/useModels.ts @@ -90,12 +90,15 @@ const useModels = () => { const toUpdate = [ ...downloadedModels, ...cachedModels.filter( - (e: Model) => !downloadedModels.some((g: Model) => g.id === e.id) + (e) => + !isLocalEngine(e.engine) && + !downloadedModels.some((g: Model) => g.id === e.id) ), ] setDownloadedModels(toUpdate) - }, [downloadedModels, setDownloadedModels]) + setExtensionModels(cachedModels) + }, [downloadedModels, setDownloadedModels, setExtensionModels]) const getModels = async (): Promise => extensionManager diff --git a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx index 91e00b430..010566f51 100644 --- a/web/screens/LocalServer/LocalServerLeftPanel/index.tsx +++ b/web/screens/LocalServer/LocalServerLeftPanel/index.tsx @@ -1,5 +1,6 @@ import { Fragment, useCallback, useState } from 'react' +import { EngineManager, Model, ModelSettingParams } from '@janhq/core' import { Button, Tooltip, Select, Input, Checkbox } from '@janhq/joi' import { useAtom, useAtomValue, useSetAtom } from 'jotai' @@ -22,7 +23,10 @@ import { hostOptions, } from '@/helpers/atoms/ApiServer.atom' -import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' +import { + LocalAPIserverModelParamsAtom, + serverEnabledAtom, +} from '@/helpers/atoms/LocalServer.atom' import { selectedModelAtom } from '@/helpers/atoms/Model.atom' const LocalServerLeftPanel = () => { @@ -31,7 +35,7 @@ const LocalServerLeftPanel = () => { const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom) const [isLoading, setIsLoading] = useState(false) - const { startModel, stateModel } = useActiveModel() + const { stateModel } = useActiveModel() const selectedModel = useAtomValue(selectedModelAtom) const [isCorsEnabled, setIsCorsEnabled] = useAtom(apiServerCorsEnabledAtom) @@ -42,9 +46,19 @@ const LocalServerLeftPanel = () => { const [port, setPort] = useAtom(apiServerPortAtom) const [prefix, setPrefix] = useAtom(apiServerPrefix) const setLoadModelError = useSetAtom(loadModelErrorAtom) - + const localAPIserverModelParams = useAtomValue(LocalAPIserverModelParamsAtom) const FIRST_TIME_VISIT_API_SERVER = 'firstTimeVisitAPIServer' + const model: Model | undefined = selectedModel + ? { + ...selectedModel, + object: selectedModel.object || '', + settings: (typeof localAPIserverModelParams === 'object' + ? { ...(localAPIserverModelParams as ModelSettingParams) } + : { ...selectedModel.settings }) as ModelSettingParams, + } + : undefined + const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] = useState(false) @@ -80,7 +94,9 @@ const LocalServerLeftPanel = () => { localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false') setFirstTimeVisitAPIServer(false) } - startModel(selectedModel.id, false).catch((e) => console.error(e)) + const engine = EngineManager.instance().get((model as Model).engine) + engine?.loadModel(model as Model) + // startModel(selectedModel.id, false).catch((e) => console.error(e)) setIsLoading(false) } catch (e) { console.error(e) diff --git a/web/screens/LocalServer/LocalServerRightPanel/index.tsx b/web/screens/LocalServer/LocalServerRightPanel/index.tsx index f0a11a865..5dba251df 100644 --- a/web/screens/LocalServer/LocalServerRightPanel/index.tsx +++ b/web/screens/LocalServer/LocalServerRightPanel/index.tsx @@ -17,13 +17,15 @@ import { useClipboard } from '@/hooks/useClipboard' import { getConfigurationsData } from '@/utils/componentSettings' -import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom' +import { + LocalAPIserverModelParamsAtom, + serverEnabledAtom, +} from '@/helpers/atoms/LocalServer.atom' import { selectedModelAtom } from '@/helpers/atoms/Model.atom' -import { getActiveThreadModelParamsAtom } from '@/helpers/atoms/Thread.atom' const LocalServerRightPanel = () => { - const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom) const loadModelError = useAtomValue(loadModelErrorAtom) + const setLocalAPIserverModelParams = useSetAtom(LocalAPIserverModelParamsAtom) const serverEnabled = useAtomValue(serverEnabledAtom) const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom) @@ -35,12 +37,19 @@ const LocalServerRightPanel = () => { extractModelLoadParams(selectedModel?.settings) ) + const overriddenSettings = + selectedModel?.settings.ctx_len && selectedModel.settings.ctx_len > 2048 + ? { ctx_len: 4096 } + : {} + useEffect(() => { if (selectedModel) { - setCurrentModelSettingParams( - extractModelLoadParams(selectedModel?.settings) - ) + setCurrentModelSettingParams({ + ...selectedModel?.settings, + ...overriddenSettings, + }) } + // eslint-disable-next-line react-hooks/exhaustive-deps }, [selectedModel]) const modelRuntimeParams = extractInferenceParams(selectedModel?.settings) @@ -50,17 +59,8 @@ const LocalServerRightPanel = () => { selectedModel ) - const modelEngineParams = extractModelLoadParams( - { - ...selectedModel?.settings, - ...activeModelParams, - }, - selectedModel?.settings - ) - const componentDataEngineSetting = getConfigurationsData( - modelEngineParams, - selectedModel + currentModelSettingParams ) const engineSettings = useMemo( @@ -78,16 +78,27 @@ const LocalServerRightPanel = () => { ) }, [componentDataRuntimeSetting]) + const onUpdateParams = useCallback(() => { + // eslint-disable-next-line @typescript-eslint/no-explicit-any + setLocalAPIserverModelParams(() => { + return { ...currentModelSettingParams } + }) + }, [currentModelSettingParams, setLocalAPIserverModelParams]) + const onValueChanged = useCallback( (key: string, value: string | number | boolean) => { - setCurrentModelSettingParams({ - ...currentModelSettingParams, + setCurrentModelSettingParams((prevParams) => ({ + ...prevParams, [key]: value, - }) + })) }, - [currentModelSettingParams] + [] ) + useEffect(() => { + onUpdateParams() + }, [currentModelSettingParams, onUpdateParams]) + return (
@@ -156,6 +167,7 @@ const LocalServerRightPanel = () => { )} @@ -165,6 +177,7 @@ const LocalServerRightPanel = () => { )}