Change scripts to download only llama.cpp engine

This commit is contained in:
Akarshan Biswas 2025-05-19 09:40:27 +05:30 committed by Louis
parent a8abc9f9aa
commit ed6f86d4b1
No known key found for this signature in database
GPG Key ID: 44FA9F4D33C37DE2
3 changed files with 9 additions and 45 deletions

View File

@ -8,7 +8,6 @@
import {
AIEngine,
localProvider,
getJanDataFolderPath,
fs,
Model,
@ -64,7 +63,7 @@ function parseGGUFFileName(filename: string): {
*/
export default class inference_llamacpp_extension
extends AIEngine
implements localProvider
implements LocalProvider
{
provider: string = 'llamacpp'
readonly providerId: string = 'llamcpp'
@ -136,7 +135,7 @@ export default class inference_llamacpp_extension
for (const entry of entries) {
if (entry.name?.endsWith('.gguf') && entry.isFile) {
const modelPath = await path.join(modelsDir, entry.name)
const stats = await fs.stat(modelPath) // Tauri's fs.stat or Node's fs.statSync
const stats = await fs.stat(modelPath)
const parsedName = parseGGUFFileName(entry.name)
result.push({
@ -170,7 +169,7 @@ export default class inference_llamacpp_extension
// TODO: implement abortPull
}
async loadModel(opts: LoadOptions): Promise<SessionInfo> {
async load(opts: LoadOptions): Promise<SessionInfo> {
if (opts.providerId !== this.providerId) {
throw new Error('Invalid providerId for LlamaCppProvider.loadModel')
}
@ -232,7 +231,7 @@ export default class inference_llamacpp_extension
}
}
async unloadModel(opts: UnloadOptions): Promise<UnloadResult> {
async unload(opts: UnloadOptions): Promise<UnloadResult> {
if (opts.providerId !== this.providerId) {
return { success: false, error: 'Invalid providerId' }
}
@ -292,7 +291,7 @@ export default class inference_llamacpp_extension
`[${this.providerId} AIEngine] Received OnModelInit for:`,
model.id
)
return super.loadModel(model)
return super.load(model)
}
override async unloadModel(model?: Model): Promise<any> {
@ -302,6 +301,6 @@ export default class inference_llamacpp_extension
`[${this.providerId} AIEngine] Received OnModelStop for:`,
model?.id || 'all models'
)
return super.unloadModel(model)
return super.unload(model)
}
}

View File

@ -1,16 +1,14 @@
@echo off
set CORTEX_VERSION=1.0.14
set ENGINE_VERSION=b5509
set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
@REM set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan
set BIN_PATH="./"
set DOWNLOAD_TOOL=..\..\node_modules\.bin\download
set DOWNLOAD_TOOL=..\..\extensions\llamacpp-extension\node_modules\.bin\download
@REM Download llama.cpp binaries
call %DOWNLOAD_TOOL% -e --strip 1 -o %BIN_PATH% https://github.com/menloresearch/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
call %DOWNLOAD_TOOL% %ENGINE_DOWNLOAD_URL%-avx2-cuda-cu12.0-x64.tar.gz -e --strip 2 -o./engines/llama.cpp/win-avx2-cuda-cu12.0-x64/%ENGINE_VERSION%
call %DOWNLOAD_TOOL% %ENGINE_DOWNLOAD_URL%-avx2-cuda-cu11.7-x64.tar.gz -e --strip 2 -o./engines/llama.cpp/win-avx2-cuda-cu11.7-x64/%ENGINE_VERSION%
@REM call %DOWNLOAD_TOOL% %ENGINE_DOWNLOAD_URL%-noavx-cuda-cu12.0-x64.tar.gz -e --strip 2 -o./engines/llama.cpp/win-noavx-cuda-cu12.0-x64/%ENGINE_VERSION%
@ -24,10 +22,6 @@ call %DOWNLOAD_TOOL% %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu12.0-x64.tar.gz
@REM Should not bundle cuda11, users should install it themselves, it bloats the app size a lot
@REM call %DOWNLOAD_TOOL% %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu11.7-x64.tar.gz -e --strip 1 -o %BIN_PATH%
move %BIN_PATH%cortex-server-beta.exe %BIN_PATH%cortex-server.exe
copy %BIN_PATH%cortex-server.exe %BIN_PATH%cortex-server-x86_64-pc-windows-msvc.exe
del %BIN_PATH%cortex-beta.exe
del %BIN_PATH%cortex.exe
@REM Loop through each folder and move DLLs
for %%F in (%SUBFOLDERS%) do (

View File

@ -13,10 +13,7 @@ download() {
rm "$OUTPUT_DIR/$(basename "$URL")"
}
# Read CORTEX_VERSION
CORTEX_VERSION=1.0.14
ENGINE_VERSION=b5509
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
BIN_PATH=./
@ -24,45 +21,19 @@ SHARED_PATH="."
# Detect platform
OS_TYPE=$(uname)
if ls ./cortex-server* 1> /dev/null 2>&1; then
echo "cortex-server file with prefix already exists. Exiting."
if ls "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}" 1> /dev/null 2>&1; then
echo "llama-server file with prefix already exists. Exiting."
exit 0
fi
if [ "$OS_TYPE" == "Linux" ]; then
# Linux downloads
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" 1 "${BIN_PATH}"
mv ./cortex-server-beta ./cortex-server
rm -rf ./cortex
rm -rf ./cortex-beta
chmod +x "./cortex-server"
cp ./cortex-server ./cortex-server-x86_64-unknown-linux-gnu
# Download engines for Linux
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-avx-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx2-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-avx512-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx512-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu12.0-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu12.0-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu11.7-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu11.7-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu12.0-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu12.0-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu11.7-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu11.7-x64/${ENGINE_VERSION}"
download "${ENGINE_DOWNLOAD_URL}-linux-vulkan-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-vulkan-x64/${ENGINE_VERSION}"
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu12.0-x64.tar.gz" 0 "${BIN_PATH}/deps"
# Should not bundle this by default, users can install cuda runtime separately
# Ship cuda 12.0 by default only for now
# download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu11.7-x64.tar.gz" 0 "${BIN_PATH}/deps"
elif [ "$OS_TYPE" == "Darwin" ]; then
# macOS downloads
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" 1 "${BIN_PATH}"
mv ./cortex-server-beta ./cortex-server
rm -rf ./cortex
rm -rf ./cortex-beta
chmod +x "./cortex-server"
mv ./cortex-server ./cortex-server-universal-apple-darwin
cp ./cortex-server-universal-apple-darwin ./cortex-server-aarch64-apple-darwin
cp ./cortex-server-universal-apple-darwin ./cortex-server-x86_64-apple-darwin
# Download engines for macOS
download "${ENGINE_DOWNLOAD_URL}-macos-arm64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/macos-arm64/${ENGINE_VERSION}"