Akarshan Biswas e80a865def
fix: detect allocation failures as out-of-memory errors (#6459)
The Llama.cpp backend can emit the phrase “failed to allocate” when it runs out of memory.
Adding this check ensures such messages are correctly classified as out‑of‑memory errors,
providing more accurate error handling CPU backends.
2025-09-15 12:35:24 +05:30

117 lines
3.5 KiB
Rust

use serde::{Deserialize, Serialize};
use thiserror;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
pub enum ErrorCode {
BinaryNotFound,
ModelFileNotFound,
LibraryPathInvalid,
// --- Model Loading Errors ---
ModelLoadFailed,
DraftModelLoadFailed,
MultimodalProjectorLoadFailed,
ModelArchNotSupported,
ModelLoadTimedOut,
LlamaCppProcessError,
// --- Memory Errors ---
OutOfMemory,
// --- Internal Application Errors ---
DeviceListParseFailed,
IoError,
InternalError,
}
#[derive(Debug, Clone, Serialize, thiserror::Error)]
#[error("LlamacppError {{ code: {code:?}, message: \"{message}\" }}")]
pub struct LlamacppError {
pub code: ErrorCode,
pub message: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub details: Option<String>,
}
impl LlamacppError {
pub fn new(code: ErrorCode, message: String, details: Option<String>) -> Self {
Self {
code,
message,
details,
}
}
/// Parses stderr from llama.cpp and creates a specific LlamacppError.
pub fn from_stderr(stderr: &str) -> Self {
let lower_stderr = stderr.to_lowercase();
// TODO: add others
let is_out_of_memory = lower_stderr.contains("out of memory")
|| lower_stderr.contains("failed to allocate")
|| lower_stderr.contains("insufficient memory")
|| lower_stderr.contains("erroroutofdevicememory") // vulkan specific
|| lower_stderr.contains("kiogpucommandbuffercallbackerroroutofmemory") // Metal-specific error code
|| lower_stderr.contains("cuda_error_out_of_memory"); // CUDA-specific
if is_out_of_memory {
return Self::new(
ErrorCode::OutOfMemory,
"Out of memory. The model requires more RAM or VRAM than available.".into(),
Some(stderr.into()),
);
}
if lower_stderr.contains("error loading model architecture") {
return Self::new(
ErrorCode::ModelArchNotSupported,
"The model's architecture is not supported by this version of the backend.".into(),
Some(stderr.into()),
);
}
Self::new(
ErrorCode::LlamaCppProcessError,
"The model process encountered an unexpected error.".into(),
Some(stderr.into()),
)
}
}
// Error type for server commands
#[derive(Debug, thiserror::Error)]
pub enum ServerError {
#[error(transparent)]
Llamacpp(#[from] LlamacppError),
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("Tauri error: {0}")]
Tauri(#[from] tauri::Error),
}
// impl serialization for tauri
impl serde::Serialize for ServerError {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let error_to_serialize: LlamacppError = match self {
ServerError::Llamacpp(err) => err.clone(),
ServerError::Io(e) => LlamacppError::new(
ErrorCode::IoError,
"An input/output error occurred.".into(),
Some(e.to_string()),
),
ServerError::Tauri(e) => LlamacppError::new(
ErrorCode::InternalError,
"An internal application error occurred.".into(),
Some(e.to_string()),
),
};
error_to_serialize.serialize(serializer)
}
}
pub type ServerResult<T> = Result<T, ServerError>;