diff --git a/extensions/inference-cortex-extension/download.bat b/extensions/inference-cortex-extension/download.bat index 0b13ee872..e19786971 100644 --- a/extensions/inference-cortex-extension/download.bat +++ b/extensions/inference-cortex-extension/download.bat @@ -3,8 +3,9 @@ set BIN_PATH=./bin set /p CORTEX_VERSION=<./bin/version.txt @REM Download cortex.llamacpp binaries -set VERSION=v0.1.25 -set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.34-windows-amd64 +set VERSION=v0.1.35 +set DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION%/cortex.llamacpp-0.1.35-windows-amd64 +set CUDA_DOWNLOAD_URL=https://github.com/janhq/cortex.llamacpp/releases/download/%VERSION% set SUBFOLDERS=win-cuda-12-0 win-cuda-11-7 win-noavx win-avx win-avx2 win-avx512 win-vulkan call .\node_modules\.bin\download -e --strip 1 -o %BIN_PATH% https://github.com/janhq/cortex/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz @@ -17,8 +18,8 @@ call .\node_modules\.bin\download %DOWNLOAD_URL%-avx.tar.gz -e --strip 1 -o %BIN call .\node_modules\.bin\download %DOWNLOAD_URL%-avx2.tar.gz -e --strip 1 -o %BIN_PATH%/avx2/engines/cortex.llamacpp call .\node_modules\.bin\download %DOWNLOAD_URL%-avx512.tar.gz -e --strip 1 -o %BIN_PATH%/avx512/engines/cortex.llamacpp call .\node_modules\.bin\download %DOWNLOAD_URL%-vulkan.tar.gz -e --strip 1 -o %BIN_PATH%/vulkan/engines/cortex.llamacpp -call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH% -call .\node_modules\.bin\download %DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH% +call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%-cuda-12-0-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH% +call .\node_modules\.bin\download %CUDA_DOWNLOAD_URL%-cuda-11-7-windows-amd64.tar.gz -e --strip 1 -o %BIN_PATH% @REM Loop through each folder and move DLLs (excluding engine.dll) diff --git a/extensions/inference-cortex-extension/download.sh b/extensions/inference-cortex-extension/download.sh index d04f0482d..9426bc872 100755 --- a/extensions/inference-cortex-extension/download.sh +++ b/extensions/inference-cortex-extension/download.sh @@ -3,7 +3,8 @@ # Read CORTEX_VERSION CORTEX_VERSION=$(cat ./bin/version.txt) CORTEX_RELEASE_URL="https://github.com/janhq/cortex/releases/download" -ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.34/cortex.llamacpp-0.1.34" +ENGINE_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35/cortex.llamacpp-0.1.35" +CUDA_DOWNLOAD_URL="https://github.com/janhq/cortex.llamacpp/releases/download/v0.1.35" # Detect platform OS_TYPE=$(uname) @@ -22,8 +23,8 @@ if [ "$OS_TYPE" == "Linux" ]; then download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-12-0.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-12-0/engines/cortex.llamacpp" 1 download "${ENGINE_DOWNLOAD_URL}-linux-amd64-noavx-cuda-11-7.tar.gz" -e --strip 1 -o "./bin/noavx-cuda-11-7/engines/cortex.llamacpp" 1 download "${ENGINE_DOWNLOAD_URL}-linux-amd64-vulkan.tar.gz" -e --strip 1 -o "./bin/vulkan/engines/cortex.llamacpp" 1 - download "${ENGINE_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1 - download "${ENGINE_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1 + download "${CUDA_DOWNLOAD_URL}-cuda-12-0-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1 + download "${CUDA_DOWNLOAD_URL}-cuda-11-7-linux-amd64.tar.gz" -e --strip 1 -o "./bin" 1 elif [ "$OS_TYPE" == "Darwin" ]; then # macOS downloads diff --git a/extensions/inference-cortex-extension/src/node/execute.test.ts b/extensions/inference-cortex-extension/src/node/execute.test.ts index 3c0b32df5..622eb38af 100644 --- a/extensions/inference-cortex-extension/src/node/execute.test.ts +++ b/extensions/inference-cortex-extension/src/node/execute.test.ts @@ -46,11 +46,11 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`/bin/arm64`), - binPath: expect.stringContaining(`/bin`), + enginePath: expect.stringContaining(`arm64`), + binPath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' - ? expect.stringContaining(`/cortex-server`) + ? expect.stringContaining(`cortex-server`) : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -61,11 +61,11 @@ describe('test executable cortex file', () => { }) expect(executableCortexFile(testSettings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`/bin/x64`), - binPath: expect.stringContaining(`/bin`), + enginePath: expect.stringContaining(`x64`), + binPath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' - ? expect.stringContaining(`/cortex-server`) + ? expect.stringContaining(`cortex-server`) : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -84,9 +84,9 @@ describe('test executable cortex file', () => { mockCpuInfo.mockReturnValue(['avx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`/bin/avx`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server.exe`), + enginePath: expect.stringContaining(`avx`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -117,11 +117,12 @@ describe('test executable cortex file', () => { }, ], } + mockCpuInfo.mockReturnValue(['avx2']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-11-7`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server.exe`), + enginePath: expect.stringContaining(`avx2-cuda-11-7`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -152,11 +153,12 @@ describe('test executable cortex file', () => { }, ], } + mockCpuInfo.mockReturnValue(['noavx']) expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ - enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server.exe`), + enginePath: expect.stringContaining(`noavx-cuda-12-0`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -175,7 +177,7 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`noavx`), - executablePath: expect.stringContaining(`/cortex-server`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -209,8 +211,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`cuda-11-7`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -244,8 +246,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -269,8 +271,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '', vkVisibleDevices: '', @@ -293,8 +295,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(instruction), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server.exe`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '', vkVisibleDevices: '', }) @@ -333,8 +335,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server.exe`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server.exe`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -373,8 +375,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`cuda-12-0`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -414,8 +416,8 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`vulkan`), - binPath: expect.stringContaining(`/bin`), - executablePath: expect.stringContaining(`/cortex-server`), + binPath: expect.stringContaining(`bin`), + executablePath: expect.stringContaining(`cortex-server`), cudaVisibleDevices: '0', vkVisibleDevices: '0', }) @@ -441,10 +443,10 @@ describe('test executable cortex file', () => { expect(executableCortexFile(settings)).toEqual( expect.objectContaining({ enginePath: expect.stringContaining(`x64`), - binPath: expect.stringContaining(`/bin`), + binPath: expect.stringContaining(`bin`), executablePath: originalPlatform === 'darwin' - ? expect.stringContaining(`/cortex-server`) + ? expect.stringContaining(`cortex-server`) : expect.anything(), cudaVisibleDevices: '', vkVisibleDevices: '', diff --git a/extensions/inference-cortex-extension/src/node/execute.ts b/extensions/inference-cortex-extension/src/node/execute.ts index 18d840fdd..b8e768bb9 100644 --- a/extensions/inference-cortex-extension/src/node/execute.ts +++ b/extensions/inference-cortex-extension/src/node/execute.ts @@ -92,10 +92,9 @@ export const executableCortexFile = ( ? os() : [ gpuRunMode(gpuSetting) !== 'cuda' || - cpuInstruction === 'avx' || - cpuInstruction === 'noavx' + cpuInstruction === 'avx2' ? cpuInstruction - : '', + : 'noavx', gpuRunMode(gpuSetting), cudaVersion(gpuSetting), ]