From 4f93e14d1685173c9138a19718cc3abac226ac4e Mon Sep 17 00:00:00 2001 From: hiento09 <136591877+hiento09@users.noreply.github.com> Date: Thu, 21 Dec 2023 15:38:21 +0700 Subject: [PATCH] Fix token speed slow in machine has multi gpus (#1157) * Update bat script windows choose GPU has highest ram to start nitro * Update bash script for linux to choose gpu has highest vram --------- Co-authored-by: Hien To --- .../bin/linux-start.sh | 37 +++++++++++++++- .../bin/win-start.bat | 44 ++++++++++++++++--- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/extensions/inference-nitro-extension/bin/linux-start.sh b/extensions/inference-nitro-extension/bin/linux-start.sh index 647d3b254..3cf9c8013 100644 --- a/extensions/inference-nitro-extension/bin/linux-start.sh +++ b/extensions/inference-nitro-extension/bin/linux-start.sh @@ -1,5 +1,38 @@ #!/bin/bash -# Attempt to run the nitro_linux_amd64_cuda file and if it fails, run nitro_linux_amd64 +# Check if nvidia-smi exists and is executable +if ! command -v nvidia-smi &> /dev/null; then + echo "nvidia-smi not found, proceeding with CPU version..." + cd linux-cpu + ./nitro "$@" + exit $? +fi + +# Find the GPU with the highest VRAM +readarray -t gpus < <(nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits) +maxMemory=0 +selectedGpuId=0 + +for gpu in "${gpus[@]}"; do + IFS=, read -ra gpuInfo <<< "$gpu" + gpuId=${gpuInfo[0]} + gpuMemory=${gpuInfo[1]} + if (( gpuMemory > maxMemory )); then + maxMemory=$gpuMemory + selectedGpuId=$gpuId + fi +done + +echo "Selected GPU: $selectedGpuId" +export CUDA_VISIBLE_DEVICES=$selectedGpuId + +# Attempt to run nitro_linux_amd64_cuda cd linux-cuda -./nitro "$@" || (echo "nitro_linux_amd64_cuda encountered an error, attempting to run nitro_linux_amd64..." && cd ../linux-cpu && ./nitro "$@") +if ./nitro "$@"; then + exit $? +else + echo "nitro_linux_amd64_cuda encountered an error, attempting to run nitro_linux_amd64..." + cd ../linux-cpu + ./nitro "$@" + exit $? +fi \ No newline at end of file diff --git a/extensions/inference-nitro-extension/bin/win-start.bat b/extensions/inference-nitro-extension/bin/win-start.bat index 2ad0d8ae8..e18c97a8e 100644 --- a/extensions/inference-nitro-extension/bin/win-start.bat +++ b/extensions/inference-nitro-extension/bin/win-start.bat @@ -1,12 +1,44 @@ @echo off +setlocal enabledelayedexpansion + +set "maxMemory=0" +set "gpuId=" + +rem check if nvidia-smi command exist or not +where nvidia-smi >nul 2>&1 +if %errorlevel% neq 0 ( + echo nvidia-smi not found, proceeding with CPU version... + cd win-cuda + goto RunCpuVersion +) + +set "tempFile=%temp%\nvidia_smi_output.txt" +nvidia-smi --query-gpu=index,memory.total --format=csv,noheader,nounits > "%tempFile%" + +for /f "usebackq tokens=1-2 delims=, " %%a in ("%tempFile%") do ( + set /a memorySize=%%b + if !memorySize! gtr !maxMemory! ( + set "maxMemory=!memorySize!" + set "gpuId=%%a" + ) +) + +rem Echo the selected GPU +echo Selected GPU: !gpuId! + +rem Set the GPU with the highest VRAM as the visible CUDA device +set CUDA_VISIBLE_DEVICES=!gpuId! rem Attempt to run nitro_windows_amd64_cuda.exe cd win-cuda nitro.exe %* +if %errorlevel% neq 0 goto RunCpuVersion +goto End -rem Check the exit code of the previous command -if %errorlevel% neq 0 ( - echo nitro_windows_amd64_cuda.exe encountered an error, attempting to run nitro_windows_amd64.exe... - cd ..\win-cpu - nitro.exe %* -) +:RunCpuVersion +rem Run nitro_windows_amd64.exe... +cd ..\win-cpu +nitro.exe %* + +:End +endlocal