diff --git a/extensions/inference-cortex-extension/bin/version.txt b/extensions/inference-cortex-extension/bin/version.txt new file mode 100644 index 000000000..7f207341d --- /dev/null +++ b/extensions/inference-cortex-extension/bin/version.txt @@ -0,0 +1 @@ +1.0.1 \ No newline at end of file diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index d764b6df8..0b13ee872 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -8,13 +8,18 @@ set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VER set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-12-0/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/win-cuda-11-7/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/win-noavx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx2/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/win-avx512/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/win-vulkan/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-12-0/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/avx2-cuda-11-7/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-12-0.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-12-0/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx-cuda-11-7.tar.gz -e --strip 1 -o %BIN_PATH%/noavx-cuda-11-7/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-noavx.tar.gz -e --strip 1 -o %BIN_PATH%/noavx/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN_PATH%/avx/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp +call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH% +call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH% + @REM Loop through each folder and move DLLs (excluding engine.dll) for %%F in (%SUBFOLDERS%) do ( diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index fe1f8af9f..d04f0482d 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -9,26 +9,30 @@ OS_TYPE=$(uname) if [ "$OS_TYPE" == "Linux" ]; then # Linux downloads - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" -e --strip 1 -o "./bin" chmod +x "./bin/cortex" # Download engines for Linux - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/linux-noavx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/linux-avx/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/linux-avx2/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/linux-avx512/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/linux-cuda-12-0/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/linux-cuda-11-7/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/linux-vulkan/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx.tar.gz" -e --strip 1 -o "./bin/noavx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx.tar.gz" -e --strip 1 -o "./bin/avx/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2.tar.gz" -e --strip 1 -o "./bin/avx2/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx512.tar.gz" -e --strip 1 -o "./bin/avx512/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-12-0/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-avx2-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/avx2-cuda-11-7/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1 + download "${ENGINE_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1 + download "${ENGINE_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1 elif [ "$OS_TYPE" == "Darwin" ]; then # macOS downloads - download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1 + download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" -e --strip 1 -o "./bin" 1 chmod +x "./bin/cortex" # Download engines for macOS - download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/mac-arm64/engines/cortex.llamacpp - download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/mac-x64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-arm64.tar.gz" -e --strip 1 -o ./bin/arm64/engines/cortex.llamacpp + download "${ENGINE_DOWNLOAD_URL}-mac-amd64.tar.gz" -e --strip 1 -o ./bin/x64/engines/cortex.llamacpp else echo "Unsupported operating system: $OS_TYPE" diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index a1b5c4ba4..3c0b32df5 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -27,8 +27,8 @@ jest.mock('cpu-instructions', () => ({ cpuInfo: jest.fn(), }, })) -let mock = cpuInfo.cpuInfo as jest.Mock -mock.mockReturnValue([]) +let mockCpuInfo = cpuInfo.cpuInfo as jest.Mock +mockCpuInfo.mockReturnValue([]) describe('test executable cortex file', () => { afterAll(function () { @@ -46,7 +46,8 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`mac-arm64`), + enginePath: expect.stringContaining(`/bin/arm64`), + binPath: expect.stringContaining(`/bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`/cortex-server`) @@ -60,7 +61,8 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`mac-x64`), + enginePath: expect.stringContaining(`/bin/x64`), + binPath: expect.stringContaining(`/bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`/cortex-server`) @@ -79,9 +81,11 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } + mockCpuInfo.mockReturnValue(['avx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win`), + enginePath: expect.stringContaining(`/bin/avx`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -115,7 +119,8 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-cuda-11-7`), + enginePath: expect.stringContaining(`cuda-11-7`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -149,7 +154,8 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-cuda-12-0`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -165,9 +171,10 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } + mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux`), + enginePath: expect.stringContaining(`noavx`), executablePath: expect.stringContaining(`/cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -201,7 +208,8 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-cuda-11-7`), + enginePath: expect.stringContaining(`cuda-11-7`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -235,7 +243,8 @@ describe('test executable cortex file', () => { } expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-cuda-12-0`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -255,11 +264,12 @@ describe('test executable cortex file', () => { const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) + mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-${instruction}`), + enginePath: expect.stringContaining(instruction), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server`), cudaVisibleDevices: '', @@ -279,10 +289,11 @@ describe('test executable cortex file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) + mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-${instruction}`), + enginePath: expect.stringContaining(instruction), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -318,10 +329,11 @@ describe('test executable cortex file', () => { } const cpuInstructions = ['avx512', 'avx2', 'avx', 'noavx'] cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) + mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`win-cuda-12-0`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -357,10 +369,11 @@ describe('test executable cortex file', () => { ], } cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) + mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-cuda-12-0`), + enginePath: expect.stringContaining(`cuda-12-0`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -397,10 +410,11 @@ describe('test executable cortex file', () => { ], } cpuInstructions.forEach((instruction) => { - mock.mockReturnValue([instruction]) + mockCpuInfo.mockReturnValue([instruction]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`linux-vulkan`), + enginePath: expect.stringContaining(`vulkan`), + binPath: expect.stringContaining(`/bin`), executablePath: expect.stringContaining(`/cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', @@ -423,10 +437,11 @@ describe('test executable cortex file', () => { ...testSettings, run_mode: 'cpu', } - mock.mockReturnValue([]) + mockCpuInfo.mockReturnValue([]) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`mac-x64`), + enginePath: expect.stringContaining(`x64`), + binPath: expect.stringContaining(`/bin`), executablePath: originalPlatform === 'darwin' ? expect.stringContaining(`/cortex-server`) diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index b5f848332..18d840fdd 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -4,6 +4,7 @@ import { cpuInfo } from 'cpu-instructions' export interface CortexExecutableOptions { enginePath: string + binPath: string executablePath: string cudaVisibleDevices: string vkVisibleDevices: string @@ -36,8 +37,8 @@ const os = (): string => { ? 'win' : process.platform === 'darwin' ? process.arch === 'arm64' - ? 'mac-arm64' - : 'mac-x64' + ? 'arm64' + : 'x64' : 'linux' } @@ -66,7 +67,7 @@ const cudaVersion = (settings?: GpuSetting): '11-7' | '12-0' | undefined => { * The CPU instructions that will be set - either 'avx512', 'avx2', 'avx', or 'noavx'. * @returns */ -const cpuInstructions = () => { +const cpuInstructions = (): string => { if (process.platform === 'darwin') return '' return cpuInfo.cpuInfo().some((e) => e.toUpperCase() === 'AVX512') ? 'avx512' @@ -84,26 +85,30 @@ const cpuInstructions = () => { export const executableCortexFile = ( gpuSetting?: GpuSetting ): CortexExecutableOptions => { - let engineFolder = [ - os(), - ...(gpuSetting?.vulkan - ? [] + const cpuInstruction = cpuInstructions() + let engineFolder = gpuSetting?.vulkan + ? 'vulkan' + : process.platform === 'darwin' + ? os() : [ - gpuRunMode(gpuSetting) !== 'cuda' ? cpuInstructions() : '', + gpuRunMode(gpuSetting) !== 'cuda' || + cpuInstruction === 'avx' || + cpuInstruction === 'noavx' + ? cpuInstruction + : '', gpuRunMode(gpuSetting), cudaVersion(gpuSetting), - ]), - gpuSetting?.vulkan ? 'vulkan' : undefined, - ] - .filter((e) => !!e) - .join('-') + ] + .filter((e) => !!e) + .join('-') let cudaVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let vkVisibleDevices = gpuSetting?.gpus_in_use.join(',') ?? '' let binaryName = `cortex-server${extension()}` - + const binPath = path.join(__dirname, '..', 'bin') return { - enginePath: path.join(__dirname, '..', 'bin', engineFolder), - executablePath: path.join(__dirname, '..', 'bin', binaryName), + enginePath: path.join(binPath, engineFolder), + executablePath: path.join(binPath, binaryName), + binPath: binPath, cudaVisibleDevices, vkVisibleDevices, } diff --git a/extensions/inference-cortex-extension/src/node/index.ts b/extensions/inference-cortex-extension/src/node/index.ts index 788318c84..c54dae4c0 100644 --- a/extensions/inference-cortex-extension/src/node/index.ts +++ b/extensions/inference-cortex-extension/src/node/index.ts @@ -27,17 +27,21 @@ function run(systemInfo?: SystemInformation): Promise { // Execute the binary log(`[CORTEX]:: Spawn cortex at path: ${executableOptions.executablePath}`) - log(`[CORTEX]::Debug: Cortex engine path: ${executableOptions.enginePath}`) + log(`[CORTEX]:: Cortex engine path: ${executableOptions.enginePath}`) // Add engine path to the PATH and LD_LIBRARY_PATH process.env.PATH = (process.env.PATH || '').concat( path.delimiter, - executableOptions.enginePath + executableOptions.enginePath, + path.delimiter, + executableOptions.binPath ) log(`[CORTEX] PATH: ${process.env.PATH}`) process.env.LD_LIBRARY_PATH = (process.env.LD_LIBRARY_PATH || '').concat( path.delimiter, - executableOptions.enginePath + executableOptions.enginePath, + path.delimiter, + executableOptions.binPath ) const dataFolderPath = getJanDataFolderPath() diff --git a/web/hooks/useModels.test.ts b/web/hooks/useModels.test.ts index 2def2b745..0440b5443 100644 --- a/web/hooks/useModels.test.ts +++ b/web/hooks/useModels.test.ts @@ -6,6 +6,9 @@ import { extensionManager } from '@/extension' // Mock dependencies jest.mock('@janhq/core') jest.mock('@/extension') +jest.mock('use-debounce', () => ({ + useDebouncedCallback: jest.fn().mockImplementation((fn) => fn), +})) import useModels from './useModels' diff --git a/web/hooks/useModels.ts b/web/hooks/useModels.ts index 3d6f7609b..8bdbd6a90 100644 --- a/web/hooks/useModels.ts +++ b/web/hooks/useModels.ts @@ -59,7 +59,7 @@ const useModels = () => { useEffect(() => { // Try get data on mount - getData() + reloadData() // Listen for model updates events.on(ModelEvent.OnModelsUpdate, async () => reloadData())