Merge pull request #4095 from janhq/main
Release 0.5.9 sync back - main branch to dev
This commit is contained in:
commit
4f70a5dff2
@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "@janhq/inference-cortex-extension",
|
||||
"productName": "Cortex Inference Engine",
|
||||
"version": "1.0.21",
|
||||
"version": "1.0.22",
|
||||
"description": "This extension embeds cortex.cpp, a lightweight inference engine written in C++. See https://jan.ai.\nAdditional dependencies could be installed to run without Cuda Toolkit installation.",
|
||||
"main": "dist/index.js",
|
||||
"node": "dist/node/index.cjs.js",
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://huggingface.co/cortexso/phi3/resolve/main/model.gguf",
|
||||
"filename": "model.gguf"
|
||||
"url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf",
|
||||
"filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "phi3-3.8b",
|
||||
@ -14,7 +14,7 @@
|
||||
"settings": {
|
||||
"ctx_len": 4096,
|
||||
"prompt_template": "<|user|>\n{prompt}<|end|>\n<|assistant|>\n",
|
||||
"llama_model_path": "model.gguf",
|
||||
"llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
|
||||
@ -1,8 +1,8 @@
|
||||
{
|
||||
"sources": [
|
||||
{
|
||||
"url": "https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-Q4_K_M.gguf",
|
||||
"filename": "Phi-3-medium-128k-instruct-Q4_K_M.gguf"
|
||||
"url": "https://huggingface.co/bartowski/Phi-3-mini-4k-instruct-GGUF/resolve/main/Phi-3-mini-4k-instruct-Q4_K_M.gguf",
|
||||
"filename": "Phi-3-mini-4k-instruct-Q4_K_M.gguf"
|
||||
}
|
||||
],
|
||||
"id": "phi3-medium",
|
||||
@ -14,7 +14,7 @@
|
||||
"settings": {
|
||||
"ctx_len": 128000,
|
||||
"prompt_template": "<|user|> {prompt}<|end|><|assistant|>",
|
||||
"llama_model_path": "Phi-3-medium-128k-instruct-Q4_K_M.gguf",
|
||||
"llama_model_path": "Phi-3-mini-4k-instruct-Q4_K_M.gguf",
|
||||
"ngl": 33
|
||||
},
|
||||
"parameters": {
|
||||
|
||||
@ -83,11 +83,11 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
|
||||
})
|
||||
}
|
||||
|
||||
onUnload(): void {
|
||||
async onUnload() {
|
||||
console.log('Clean up cortex.cpp services')
|
||||
this.shouldReconnect = false
|
||||
this.clean()
|
||||
executeOnMain(NODE, 'dispose')
|
||||
await executeOnMain(NODE, 'dispose')
|
||||
super.onUnload()
|
||||
}
|
||||
|
||||
|
||||
27
extensions/inference-cortex-extension/src/node/cpuInfo.ts
Normal file
27
extensions/inference-cortex-extension/src/node/cpuInfo.ts
Normal file
@ -0,0 +1,27 @@
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
|
||||
// Check the CPU info and determine the supported instruction set
|
||||
const info = cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
||||
? 'avx512'
|
||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
|
||||
? 'avx2'
|
||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
|
||||
? 'avx'
|
||||
: 'noavx'
|
||||
|
||||
// Send the result and wait for confirmation before exiting
|
||||
new Promise<void>((resolve, reject) => {
|
||||
// @ts-ignore
|
||||
process.send(info, (error: Error | null) => {
|
||||
if (error) {
|
||||
reject(error)
|
||||
} else {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
})
|
||||
.then(() => process.exit(0))
|
||||
.catch((error) => {
|
||||
console.error('Failed to send info:', error)
|
||||
process.exit(1)
|
||||
})
|
||||
@ -1,6 +1,6 @@
|
||||
import * as path from 'path'
|
||||
import { cpuInfo } from 'cpu-instructions'
|
||||
import { GpuSetting, appResourcePath, log } from '@janhq/core/node'
|
||||
import { fork } from 'child_process'
|
||||
|
||||
export interface CortexExecutableOptions {
|
||||
enginePath: string
|
||||
@ -52,7 +52,9 @@ const extension = (): '.exe' | '' => {
|
||||
*/
|
||||
const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
||||
const isUsingCuda =
|
||||
settings?.vulkan !== true && settings?.run_mode === 'gpu' && !os().includes('mac')
|
||||
settings?.vulkan !== true &&
|
||||
settings?.run_mode === 'gpu' &&
|
||||
!os().includes('mac')
|
||||
|
||||
if (!isUsingCuda) return undefined
|
||||
return settings?.cuda?.version === '11' ? '11-7' : '12-0'
|
||||
@ -62,15 +64,29 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => {
|
||||
* The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'.
|
||||
* @returns
|
||||
*/
|
||||
const cpuInstructions = (): string => {
|
||||
const cpuInstructions = async (): Promise<string> => {
|
||||
if (process.platform === 'darwin') return ''
|
||||
return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512')
|
||||
? 'avx512'
|
||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX2')
|
||||
? 'avx2'
|
||||
: cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX')
|
||||
? 'avx'
|
||||
: 'noavx'
|
||||
|
||||
const child = fork(path.join(__dirname, './cpuInfo.js')) // Path to the child process file
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
child.on('message', (cpuInfo?: string) => {
|
||||
resolve(cpuInfo ?? 'noavx')
|
||||
child.kill() // Kill the child process after receiving the result
|
||||
})
|
||||
|
||||
child.on('error', (err) => {
|
||||
resolve('noavx')
|
||||
child.kill()
|
||||
})
|
||||
|
||||
child.on('exit', (code) => {
|
||||
if (code !== 0) {
|
||||
resolve('noavx')
|
||||
child.kill()
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
@ -94,8 +110,11 @@ export const executableCortexFile = (
|
||||
/**
|
||||
* Find which variant to run based on the current platform.
|
||||
*/
|
||||
export const engineVariant = (gpuSetting?: GpuSetting): string => {
|
||||
const cpuInstruction = cpuInstructions()
|
||||
export const engineVariant = async (
|
||||
gpuSetting?: GpuSetting
|
||||
): Promise<string> => {
|
||||
const cpuInstruction = await cpuInstructions()
|
||||
log(`[CORTEX]: CPU instruction: ${cpuInstruction}`)
|
||||
let engineVariant = [
|
||||
os(),
|
||||
gpuSetting?.vulkan
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import React from 'react'
|
||||
import * as SliderPrimitive from '@radix-ui/react-slider'
|
||||
import { twMerge } from 'tailwind-merge'
|
||||
|
||||
import './styles.scss'
|
||||
|
||||
@ -25,7 +26,7 @@ const Slider = ({
|
||||
disabled,
|
||||
}: Props) => (
|
||||
<SliderPrimitive.Root
|
||||
className="slider"
|
||||
className={twMerge('slider', disabled && 'slider--disabled')}
|
||||
name={name}
|
||||
min={min}
|
||||
max={max}
|
||||
|
||||
@ -6,6 +6,11 @@
|
||||
touch-action: none;
|
||||
height: 16px;
|
||||
|
||||
&--disabled {
|
||||
cursor: not-allowed;
|
||||
opacity: 0.2;
|
||||
}
|
||||
|
||||
&__track {
|
||||
background-color: hsla(var(--slider-track-bg));
|
||||
position: relative;
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
import { atom } from 'jotai'
|
||||
|
||||
export const serverEnabledAtom = atom<boolean>(false)
|
||||
|
||||
export const LocalAPIserverModelParamsAtom = atom()
|
||||
|
||||
@ -17,6 +17,14 @@ jest.mock('@janhq/core', () => ({
|
||||
fs: {
|
||||
rm: jest.fn(),
|
||||
},
|
||||
EngineManager: {
|
||||
instance: jest.fn().mockReturnValue({
|
||||
get: jest.fn(),
|
||||
engines: {
|
||||
values: jest.fn().mockReturnValue([])
|
||||
}
|
||||
}),
|
||||
},
|
||||
}))
|
||||
|
||||
describe('useFactoryReset', () => {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { useCallback } from 'react'
|
||||
|
||||
import { fs, AppConfiguration } from '@janhq/core'
|
||||
import { fs, AppConfiguration, EngineManager } from '@janhq/core'
|
||||
import { atom, useAtomValue, useSetAtom } from 'jotai'
|
||||
|
||||
import { useActiveModel } from './useActiveModel'
|
||||
@ -37,6 +37,15 @@ export default function useFactoryReset() {
|
||||
// 1: Stop running model
|
||||
setFactoryResetState(FactoryResetState.StoppingModel)
|
||||
await stopModel()
|
||||
|
||||
await Promise.all(
|
||||
EngineManager.instance()
|
||||
.engines.values()
|
||||
.map(async (engine) => {
|
||||
await engine.onUnload()
|
||||
})
|
||||
)
|
||||
|
||||
await new Promise((resolve) => setTimeout(resolve, 4000))
|
||||
|
||||
// 2: Delete the old jan data folder
|
||||
|
||||
@ -90,12 +90,15 @@ const useModels = () => {
|
||||
const toUpdate = [
|
||||
...downloadedModels,
|
||||
...cachedModels.filter(
|
||||
(e: Model) => !downloadedModels.some((g: Model) => g.id === e.id)
|
||||
(e) =>
|
||||
!isLocalEngine(e.engine) &&
|
||||
!downloadedModels.some((g: Model) => g.id === e.id)
|
||||
),
|
||||
]
|
||||
|
||||
setDownloadedModels(toUpdate)
|
||||
}, [downloadedModels, setDownloadedModels])
|
||||
setExtensionModels(cachedModels)
|
||||
}, [downloadedModels, setDownloadedModels, setExtensionModels])
|
||||
|
||||
const getModels = async (): Promise<Model[]> =>
|
||||
extensionManager
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
import { Fragment, useCallback, useState } from 'react'
|
||||
|
||||
import { EngineManager, Model, ModelSettingParams } from '@janhq/core'
|
||||
import { Button, Tooltip, Select, Input, Checkbox } from '@janhq/joi'
|
||||
|
||||
import { useAtom, useAtomValue, useSetAtom } from 'jotai'
|
||||
@ -22,7 +23,10 @@ import {
|
||||
hostOptions,
|
||||
} from '@/helpers/atoms/ApiServer.atom'
|
||||
|
||||
import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
|
||||
import {
|
||||
LocalAPIserverModelParamsAtom,
|
||||
serverEnabledAtom,
|
||||
} from '@/helpers/atoms/LocalServer.atom'
|
||||
import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
|
||||
|
||||
const LocalServerLeftPanel = () => {
|
||||
@ -31,7 +35,7 @@ const LocalServerLeftPanel = () => {
|
||||
const [serverEnabled, setServerEnabled] = useAtom(serverEnabledAtom)
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
|
||||
const { startModel, stateModel } = useActiveModel()
|
||||
const { stateModel } = useActiveModel()
|
||||
const selectedModel = useAtomValue(selectedModelAtom)
|
||||
|
||||
const [isCorsEnabled, setIsCorsEnabled] = useAtom(apiServerCorsEnabledAtom)
|
||||
@ -42,9 +46,19 @@ const LocalServerLeftPanel = () => {
|
||||
const [port, setPort] = useAtom(apiServerPortAtom)
|
||||
const [prefix, setPrefix] = useAtom(apiServerPrefix)
|
||||
const setLoadModelError = useSetAtom(loadModelErrorAtom)
|
||||
|
||||
const localAPIserverModelParams = useAtomValue(LocalAPIserverModelParamsAtom)
|
||||
const FIRST_TIME_VISIT_API_SERVER = 'firstTimeVisitAPIServer'
|
||||
|
||||
const model: Model | undefined = selectedModel
|
||||
? {
|
||||
...selectedModel,
|
||||
object: selectedModel.object || '',
|
||||
settings: (typeof localAPIserverModelParams === 'object'
|
||||
? { ...(localAPIserverModelParams as ModelSettingParams) }
|
||||
: { ...selectedModel.settings }) as ModelSettingParams,
|
||||
}
|
||||
: undefined
|
||||
|
||||
const [firstTimeVisitAPIServer, setFirstTimeVisitAPIServer] =
|
||||
useState<boolean>(false)
|
||||
|
||||
@ -80,7 +94,9 @@ const LocalServerLeftPanel = () => {
|
||||
localStorage.setItem(FIRST_TIME_VISIT_API_SERVER, 'false')
|
||||
setFirstTimeVisitAPIServer(false)
|
||||
}
|
||||
startModel(selectedModel.id, false).catch((e) => console.error(e))
|
||||
const engine = EngineManager.instance().get((model as Model).engine)
|
||||
engine?.loadModel(model as Model)
|
||||
// startModel(selectedModel.id, false).catch((e) => console.error(e))
|
||||
setIsLoading(false)
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
|
||||
@ -17,13 +17,15 @@ import { useClipboard } from '@/hooks/useClipboard'
|
||||
|
||||
import { getConfigurationsData } from '@/utils/componentSettings'
|
||||
|
||||
import { serverEnabledAtom } from '@/helpers/atoms/LocalServer.atom'
|
||||
import {
|
||||
LocalAPIserverModelParamsAtom,
|
||||
serverEnabledAtom,
|
||||
} from '@/helpers/atoms/LocalServer.atom'
|
||||
import { selectedModelAtom } from '@/helpers/atoms/Model.atom'
|
||||
import { getActiveThreadModelParamsAtom } from '@/helpers/atoms/Thread.atom'
|
||||
|
||||
const LocalServerRightPanel = () => {
|
||||
const activeModelParams = useAtomValue(getActiveThreadModelParamsAtom)
|
||||
const loadModelError = useAtomValue(loadModelErrorAtom)
|
||||
const setLocalAPIserverModelParams = useSetAtom(LocalAPIserverModelParamsAtom)
|
||||
const serverEnabled = useAtomValue(serverEnabledAtom)
|
||||
const setModalTroubleShooting = useSetAtom(modalTroubleShootingAtom)
|
||||
|
||||
@ -35,12 +37,19 @@ const LocalServerRightPanel = () => {
|
||||
extractModelLoadParams(selectedModel?.settings)
|
||||
)
|
||||
|
||||
const overriddenSettings =
|
||||
selectedModel?.settings.ctx_len && selectedModel.settings.ctx_len > 2048
|
||||
? { ctx_len: 4096 }
|
||||
: {}
|
||||
|
||||
useEffect(() => {
|
||||
if (selectedModel) {
|
||||
setCurrentModelSettingParams(
|
||||
extractModelLoadParams(selectedModel?.settings)
|
||||
)
|
||||
setCurrentModelSettingParams({
|
||||
...selectedModel?.settings,
|
||||
...overriddenSettings,
|
||||
})
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [selectedModel])
|
||||
|
||||
const modelRuntimeParams = extractInferenceParams(selectedModel?.settings)
|
||||
@ -50,17 +59,8 @@ const LocalServerRightPanel = () => {
|
||||
selectedModel
|
||||
)
|
||||
|
||||
const modelEngineParams = extractModelLoadParams(
|
||||
{
|
||||
...selectedModel?.settings,
|
||||
...activeModelParams,
|
||||
},
|
||||
selectedModel?.settings
|
||||
)
|
||||
|
||||
const componentDataEngineSetting = getConfigurationsData(
|
||||
modelEngineParams,
|
||||
selectedModel
|
||||
currentModelSettingParams
|
||||
)
|
||||
|
||||
const engineSettings = useMemo(
|
||||
@ -78,16 +78,27 @@ const LocalServerRightPanel = () => {
|
||||
)
|
||||
}, [componentDataRuntimeSetting])
|
||||
|
||||
const onUpdateParams = useCallback(() => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
setLocalAPIserverModelParams(() => {
|
||||
return { ...currentModelSettingParams }
|
||||
})
|
||||
}, [currentModelSettingParams, setLocalAPIserverModelParams])
|
||||
|
||||
const onValueChanged = useCallback(
|
||||
(key: string, value: string | number | boolean) => {
|
||||
setCurrentModelSettingParams({
|
||||
...currentModelSettingParams,
|
||||
setCurrentModelSettingParams((prevParams) => ({
|
||||
...prevParams,
|
||||
[key]: value,
|
||||
})
|
||||
}))
|
||||
},
|
||||
[currentModelSettingParams]
|
||||
[]
|
||||
)
|
||||
|
||||
useEffect(() => {
|
||||
onUpdateParams()
|
||||
}, [currentModelSettingParams, onUpdateParams])
|
||||
|
||||
return (
|
||||
<RightPanelContainer>
|
||||
<div className="mb-4 px-4 pt-4">
|
||||
@ -156,6 +167,7 @@ const LocalServerRightPanel = () => {
|
||||
<ModelSetting
|
||||
componentProps={modelSettings}
|
||||
onValueChanged={onValueChanged}
|
||||
disabled={serverEnabled}
|
||||
/>
|
||||
</AccordionItem>
|
||||
)}
|
||||
@ -165,6 +177,7 @@ const LocalServerRightPanel = () => {
|
||||
<EngineSetting
|
||||
componentData={engineSettings}
|
||||
onValueChanged={onValueChanged}
|
||||
disabled={serverEnabled}
|
||||
/>
|
||||
</AccordionItem>
|
||||
)}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user