Change scripts to download only llama.cpp engine
This commit is contained in:
parent
a8abc9f9aa
commit
ed6f86d4b1
@ -8,7 +8,6 @@
|
||||
|
||||
import {
|
||||
AIEngine,
|
||||
localProvider,
|
||||
getJanDataFolderPath,
|
||||
fs,
|
||||
Model,
|
||||
@ -64,7 +63,7 @@ function parseGGUFFileName(filename: string): {
|
||||
*/
|
||||
export default class inference_llamacpp_extension
|
||||
extends AIEngine
|
||||
implements localProvider
|
||||
implements LocalProvider
|
||||
{
|
||||
provider: string = 'llamacpp'
|
||||
readonly providerId: string = 'llamcpp'
|
||||
@ -136,7 +135,7 @@ export default class inference_llamacpp_extension
|
||||
for (const entry of entries) {
|
||||
if (entry.name?.endsWith('.gguf') && entry.isFile) {
|
||||
const modelPath = await path.join(modelsDir, entry.name)
|
||||
const stats = await fs.stat(modelPath) // Tauri's fs.stat or Node's fs.statSync
|
||||
const stats = await fs.stat(modelPath)
|
||||
const parsedName = parseGGUFFileName(entry.name)
|
||||
|
||||
result.push({
|
||||
@ -170,7 +169,7 @@ export default class inference_llamacpp_extension
|
||||
// TODO: implement abortPull
|
||||
}
|
||||
|
||||
async loadModel(opts: LoadOptions): Promise<SessionInfo> {
|
||||
async load(opts: LoadOptions): Promise<SessionInfo> {
|
||||
if (opts.providerId !== this.providerId) {
|
||||
throw new Error('Invalid providerId for LlamaCppProvider.loadModel')
|
||||
}
|
||||
@ -232,7 +231,7 @@ export default class inference_llamacpp_extension
|
||||
}
|
||||
}
|
||||
|
||||
async unloadModel(opts: UnloadOptions): Promise<UnloadResult> {
|
||||
async unload(opts: UnloadOptions): Promise<UnloadResult> {
|
||||
if (opts.providerId !== this.providerId) {
|
||||
return { success: false, error: 'Invalid providerId' }
|
||||
}
|
||||
@ -292,7 +291,7 @@ export default class inference_llamacpp_extension
|
||||
`[${this.providerId} AIEngine] Received OnModelInit for:`,
|
||||
model.id
|
||||
)
|
||||
return super.loadModel(model)
|
||||
return super.load(model)
|
||||
}
|
||||
|
||||
override async unloadModel(model?: Model): Promise<any> {
|
||||
@ -302,6 +301,6 @@ export default class inference_llamacpp_extension
|
||||
`[${this.providerId} AIEngine] Received OnModelStop for:`,
|
||||
model?.id || 'all models'
|
||||
)
|
||||
return super.unloadModel(model)
|
||||
return super.unload(model)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,16 +1,14 @@
|
||||
@echo off
|
||||
|
||||
set CORTEX_VERSION=1.0.14
|
||||
set ENGINE_VERSION=b5509
|
||||
set ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||
set ENGINE_DOWNLOAD_GGML_URL=https://github.com/ggml-org/llama.cpp/releases/download/%ENGINE_VERSION%/llama-%ENGINE_VERSION%-bin-win
|
||||
set CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/%ENGINE_VERSION%
|
||||
@REM set SUBFOLDERS=windows-amd64-noavx-cuda-12-0 windows-amd64-noavx-cuda-11-7 windows-amd64-avx2-cuda-12-0 windows-amd64-avx2-cuda-11-7 windows-amd64-noavx windows-amd64-avx windows-amd64-avx2 windows-amd64-avx512 windows-amd64-vulkan
|
||||
set BIN_PATH="./"
|
||||
set DOWNLOAD_TOOL=..\..\node_modules\.bin\download
|
||||
set DOWNLOAD_TOOL=..\..\extensions\llamacpp-extension\node_modules\.bin\download
|
||||
|
||||
@REM Download llama.cpp binaries
|
||||
call %DOWNLOAD_TOOL% -e --strip 1 -o %BIN_PATH% https://github.com/menloresearch/cortex.cpp/releases/download/v%CORTEX_VERSION%/cortex-%CORTEX_VERSION%-windows-amd64.tar.gz
|
||||
call %DOWNLOAD_TOOL% %ENGINE_DOWNLOAD_URL%-avx2-cuda-cu12.0-x64.tar.gz -e --strip 2 -o./engines/llama.cpp/win-avx2-cuda-cu12.0-x64/%ENGINE_VERSION%
|
||||
call %DOWNLOAD_TOOL% %ENGINE_DOWNLOAD_URL%-avx2-cuda-cu11.7-x64.tar.gz -e --strip 2 -o./engines/llama.cpp/win-avx2-cuda-cu11.7-x64/%ENGINE_VERSION%
|
||||
@REM call %DOWNLOAD_TOOL% %ENGINE_DOWNLOAD_URL%-noavx-cuda-cu12.0-x64.tar.gz -e --strip 2 -o./engines/llama.cpp/win-noavx-cuda-cu12.0-x64/%ENGINE_VERSION%
|
||||
@ -24,10 +22,6 @@ call %DOWNLOAD_TOOL% %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu12.0-x64.tar.gz
|
||||
@REM Should not bundle cuda11, users should install it themselves, it bloats the app size a lot
|
||||
@REM call %DOWNLOAD_TOOL% %CUDA_DOWNLOAD_URL%/cudart-llama-bin-win-cu11.7-x64.tar.gz -e --strip 1 -o %BIN_PATH%
|
||||
|
||||
move %BIN_PATH%cortex-server-beta.exe %BIN_PATH%cortex-server.exe
|
||||
copy %BIN_PATH%cortex-server.exe %BIN_PATH%cortex-server-x86_64-pc-windows-msvc.exe
|
||||
del %BIN_PATH%cortex-beta.exe
|
||||
del %BIN_PATH%cortex.exe
|
||||
|
||||
@REM Loop through each folder and move DLLs
|
||||
for %%F in (%SUBFOLDERS%) do (
|
||||
|
||||
@ -13,10 +13,7 @@ download() {
|
||||
rm "$OUTPUT_DIR/$(basename "$URL")"
|
||||
}
|
||||
|
||||
# Read CORTEX_VERSION
|
||||
CORTEX_VERSION=1.0.14
|
||||
ENGINE_VERSION=b5509
|
||||
CORTEX_RELEASE_URL="https://github.com/menloresearch/cortex.cpp/releases/download"
|
||||
ENGINE_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}/llama-${ENGINE_VERSION}-bin
|
||||
CUDA_DOWNLOAD_URL=https://github.com/menloresearch/llama.cpp/releases/download/${ENGINE_VERSION}
|
||||
BIN_PATH=./
|
||||
@ -24,45 +21,19 @@ SHARED_PATH="."
|
||||
# Detect platform
|
||||
OS_TYPE=$(uname)
|
||||
|
||||
if ls ./cortex-server* 1> /dev/null 2>&1; then
|
||||
echo "cortex-server file with prefix already exists. Exiting."
|
||||
if ls "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}" 1> /dev/null 2>&1; then
|
||||
echo "llama-server file with prefix already exists. Exiting."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$OS_TYPE" == "Linux" ]; then
|
||||
# Linux downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-linux-amd64.tar.gz" 1 "${BIN_PATH}"
|
||||
mv ./cortex-server-beta ./cortex-server
|
||||
rm -rf ./cortex
|
||||
rm -rf ./cortex-beta
|
||||
chmod +x "./cortex-server"
|
||||
cp ./cortex-server ./cortex-server-x86_64-unknown-linux-gnu
|
||||
|
||||
# Download engines for Linux
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-noavx-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx2-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx512-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx512-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu12.0-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu12.0-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-avx2-cuda-cu11.7-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-avx2-cuda-cu11.7-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu12.0-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu12.0-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-noavx-cuda-cu11.7-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-noavx-cuda-cu11.7-x64/${ENGINE_VERSION}"
|
||||
download "${ENGINE_DOWNLOAD_URL}-linux-vulkan-x64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/linux-vulkan-x64/${ENGINE_VERSION}"
|
||||
download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu12.0-x64.tar.gz" 0 "${BIN_PATH}/deps"
|
||||
# Should not bundle this by default, users can install cuda runtime separately
|
||||
# Ship cuda 12.0 by default only for now
|
||||
# download "${CUDA_DOWNLOAD_URL}/cudart-llama-bin-linux-cu11.7-x64.tar.gz" 0 "${BIN_PATH}/deps"
|
||||
|
||||
elif [ "$OS_TYPE" == "Darwin" ]; then
|
||||
# macOS downloads
|
||||
download "${CORTEX_RELEASE_URL}/v${CORTEX_VERSION}/cortex-${CORTEX_VERSION}-mac-universal.tar.gz" 1 "${BIN_PATH}"
|
||||
mv ./cortex-server-beta ./cortex-server
|
||||
rm -rf ./cortex
|
||||
rm -rf ./cortex-beta
|
||||
chmod +x "./cortex-server"
|
||||
mv ./cortex-server ./cortex-server-universal-apple-darwin
|
||||
cp ./cortex-server-universal-apple-darwin ./cortex-server-aarch64-apple-darwin
|
||||
cp ./cortex-server-universal-apple-darwin ./cortex-server-x86_64-apple-darwin
|
||||
|
||||
# Download engines for macOS
|
||||
download "${ENGINE_DOWNLOAD_URL}-macos-arm64.tar.gz" 2 "${SHARED_PATH}/engines/llama.cpp/macos-arm64/${ENGINE_VERSION}"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user