Merge branch 'dev' into feat/old-mac-support

This commit is contained in:
Sherzod Mutalov 2025-08-03 10:19:30 +05:00 committed by GitHub
commit e51847830a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 124 additions and 76 deletions

View File

@ -153,7 +153,7 @@
{ {
"key": "cont_batching", "key": "cont_batching",
"title": "Continuous Batching", "title": "Continuous Batching",
"description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).", "description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests.",
"controllerType": "checkbox", "controllerType": "checkbox",
"controllerProps": { "controllerProps": {
"value": false "value": false

View File

@ -185,40 +185,76 @@ pub async fn load_llama_model(
// Spawn task to monitor stdout for readiness // Spawn task to monitor stdout for readiness
let _stdout_task = tokio::spawn(async move { let _stdout_task = tokio::spawn(async move {
let mut reader = BufReader::new(stdout).lines(); let mut reader = BufReader::new(stdout);
while let Ok(Some(line)) = reader.next_line().await { let mut byte_buffer = Vec::new();
log::info!("[llamacpp stdout] {}", line);
loop {
byte_buffer.clear();
match reader.read_until(b'\n', &mut byte_buffer).await {
Ok(0) => break, // EOF
Ok(_) => {
let line = String::from_utf8_lossy(&byte_buffer);
let line = line.trim_end();
if !line.is_empty() {
log::info!("[llamacpp stdout] {}", line);
}
}
Err(e) => {
log::error!("Error reading stdout: {}", e);
break;
}
}
} }
}); });
// Spawn task to capture stderr and monitor for errors // Spawn task to capture stderr and monitor for errors
let stderr_task = tokio::spawn(async move { let stderr_task = tokio::spawn(async move {
let mut reader = BufReader::new(stderr).lines(); let mut reader = BufReader::new(stderr);
let mut byte_buffer = Vec::new();
let mut stderr_buffer = String::new(); let mut stderr_buffer = String::new();
while let Ok(Some(line)) = reader.next_line().await {
log::info!("[llamacpp] {}", line); // Using your log format loop {
stderr_buffer.push_str(&line); byte_buffer.clear();
stderr_buffer.push('\n'); match reader.read_until(b'\n', &mut byte_buffer).await {
// Check for critical error indicators that should stop the process Ok(0) => break, // EOF
// TODO: check for different errors Ok(_) => {
if line.to_lowercase().contains("error") let line = String::from_utf8_lossy(&byte_buffer);
|| line.to_lowercase().contains("failed") let line = line.trim_end();
|| line.to_lowercase().contains("fatal")
|| line.contains("CUDA error") if !line.is_empty() {
|| line.contains("out of memory") stderr_buffer.push_str(line);
|| line.contains("failed to load") stderr_buffer.push('\n');
{ log::info!("[llamacpp] {}", line);
let _ = error_tx.send(line.clone()).await;
} // Check for critical error indicators that should stop the process
// Check for readiness indicator - llama-server outputs this when ready let line_lower = line.to_string().to_lowercase();
else if line.contains("server is listening on") if line_lower.contains("error loading model")
|| line.contains("starting the main loop") || line_lower.contains("unknown model architecture")
|| line.contains("server listening on") || line_lower.contains("fatal")
{ || line_lower.contains("cuda error")
log::info!("Server appears to be ready based on stdout: '{}'", line); || line_lower.contains("out of memory")
let _ = ready_tx.send(true).await; || line_lower.contains("error")
|| line_lower.contains("failed")
{
let _ = error_tx.send(line.to_string()).await;
}
// Check for readiness indicator - llama-server outputs this when ready
else if line.contains("server is listening on")
|| line.contains("starting the main loop")
|| line.contains("server listening on")
{
log::info!("Server appears to be ready based on stderr: '{}'", line);
let _ = ready_tx.send(true).await;
}
}
}
Err(e) => {
log::error!("Error reading stderr: {}", e);
break;
}
} }
} }
stderr_buffer stderr_buffer
}); });
@ -226,7 +262,7 @@ pub async fn load_llama_model(
if let Some(status) = child.try_wait()? { if let Some(status) = child.try_wait()? {
if !status.success() { if !status.success() {
let stderr_output = stderr_task.await.unwrap_or_default(); let stderr_output = stderr_task.await.unwrap_or_default();
log::error!("llama.cpp exited early with code {status:?}"); log::error!("llama.cpp exited early with code {:?}", status);
log::error!("--- stderr ---\n{}", stderr_output); log::error!("--- stderr ---\n{}", stderr_output);
return Err(ServerError::LlamacppError(stderr_output.trim().to_string())); return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
} }
@ -246,25 +282,43 @@ pub async fn load_llama_model(
// Error occurred // Error occurred
Some(error_msg) = error_rx.recv() => { Some(error_msg) = error_rx.recv() => {
log::error!("Server encountered an error: {}", error_msg); log::error!("Server encountered an error: {}", error_msg);
let _ = child.kill().await;
// Give process a moment to exit naturally
tokio::time::sleep(Duration::from_millis(100)).await;
// Check if process already exited
if let Some(status) = child.try_wait()? {
log::info!("Process exited with code {:?}", status);
return Err(ServerError::LlamacppError(error_msg));
} else {
log::info!("Process still running, killing it...");
let _ = child.kill().await;
}
// Get full stderr output // Get full stderr output
let stderr_output = stderr_task.await.unwrap_or_default(); let stderr_output = stderr_task.await.unwrap_or_default();
return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output))); return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output)));
} }
// Timeout // Check for process exit more frequently
_ = tokio::time::sleep(Duration::from_millis(100)) => { _ = tokio::time::sleep(Duration::from_millis(50)) => {
// Check if process exited
if let Some(status) = child.try_wait()? {
let stderr_output = stderr_task.await.unwrap_or_default();
if !status.success() {
log::error!("llama.cpp exited with error code {:?}", status);
return Err(ServerError::LlamacppError(format!("Process exited with code {:?}\n\nStderr:\n{}", status, stderr_output)));
} else {
log::error!("llama.cpp exited successfully but without ready signal");
return Err(ServerError::LlamacppError(format!("Process exited unexpectedly\n\nStderr:\n{}", stderr_output)));
}
}
// Timeout check
if start_time.elapsed() > timeout_duration { if start_time.elapsed() > timeout_duration {
log::error!("Timeout waiting for server to be ready"); log::error!("Timeout waiting for server to be ready");
let _ = child.kill().await; let _ = child.kill().await;
return Err(ServerError::LlamacppError("Server startup timeout".to_string())); let stderr_output = stderr_task.await.unwrap_or_default();
} return Err(ServerError::LlamacppError(format!("Server startup timeout\n\nStderr:\n{}", stderr_output)));
// Check if process is still alive
if let Some(status) = child.try_wait()? {
if !status.success() {
let stderr_output = stderr_task.await.unwrap_or_default();
log::error!("llama.cpp exited during startup with code {status:?}");
return Err(ServerError::LlamacppError(format!("Process exited with code {status:?}\n\nStderr:\n{}", stderr_output)));
}
} }
} }
} }
@ -331,7 +385,10 @@ pub async fn unload_llama_model(
#[cfg(all(windows, target_arch = "x86_64"))] #[cfg(all(windows, target_arch = "x86_64"))]
{ {
if let Some(raw_pid) = child.id() { if let Some(raw_pid) = child.id() {
log::warn!("gracefully killing is unsupported on Windows, force-killing PID {}", raw_pid); log::warn!(
"gracefully killing is unsupported on Windows, force-killing PID {}",
raw_pid
);
// Since we know a graceful shutdown doesn't work and there are no child processes // Since we know a graceful shutdown doesn't work and there are no child processes
// to worry about, we can use `child.kill()` directly. On Windows, this is // to worry about, we can use `child.kill()` directly. On Windows, this is

View File

@ -266,14 +266,7 @@ describe('useSmallScreenStore', () => {
}) })
describe('useSmallScreen', () => { describe('useSmallScreen', () => {
beforeEach(() => { it('should return small screen state', () => {
// Reset the store state before each test
act(() => {
useSmallScreenStore.getState().setIsSmallScreen(false)
})
})
it('should return small screen state and update store', () => {
const mockMediaQueryList = { const mockMediaQueryList = {
matches: true, matches: true,
addEventListener: vi.fn(), addEventListener: vi.fn(),
@ -285,7 +278,6 @@ describe('useSmallScreen', () => {
const { result } = renderHook(() => useSmallScreen()) const { result } = renderHook(() => useSmallScreen())
expect(result.current).toBe(true) expect(result.current).toBe(true)
expect(useSmallScreenStore.getState().isSmallScreen).toBe(true)
}) })
it('should update when media query changes', () => { it('should update when media query changes', () => {
@ -309,7 +301,6 @@ describe('useSmallScreen', () => {
}) })
expect(result.current).toBe(true) expect(result.current).toBe(true)
expect(useSmallScreenStore.getState().isSmallScreen).toBe(true)
}) })
it('should use correct media query for small screen detection', () => { it('should use correct media query for small screen detection', () => {
@ -325,20 +316,4 @@ describe('useSmallScreen', () => {
expect(mockMatchMedia).toHaveBeenCalledWith('(max-width: 768px)') expect(mockMatchMedia).toHaveBeenCalledWith('(max-width: 768px)')
}) })
it('should persist state across multiple hook instances', () => {
const mockMediaQueryList = {
matches: true,
addEventListener: vi.fn(),
removeEventListener: vi.fn(),
}
mockMatchMedia.mockReturnValue(mockMediaQueryList)
const { result: result1 } = renderHook(() => useSmallScreen())
const { result: result2 } = renderHook(() => useSmallScreen())
expect(result1.current).toBe(true)
expect(result2.current).toBe(true)
})
}) })

View File

@ -77,14 +77,7 @@ export function useMediaQuery(
return matches || false return matches || false
} }
// Specific hook for small screen detection with state management // Specific hook for small screen detection
export const useSmallScreen = (): boolean => { export const useSmallScreen = (): boolean => {
const { isSmallScreen, setIsSmallScreen } = useSmallScreenStore() return useMediaQuery('(max-width: 768px)')
const mediaQuery = useMediaQuery('(max-width: 768px)')
useEffect(() => {
setIsSmallScreen(mediaQuery)
}, [mediaQuery, setIsSmallScreen])
return isSmallScreen
} }

View File

@ -210,6 +210,29 @@ export const useModelProvider = create<ModelProviderState>()(
{ {
name: localStorageKey.modelProvider, name: localStorageKey.modelProvider,
storage: createJSONStorage(() => localStorage), storage: createJSONStorage(() => localStorage),
migrate: (persistedState: unknown, version: number) => {
const state = persistedState as ModelProviderState
// Migration for cont_batching description update (version 0 -> 1)
if (version === 0 && state?.providers) {
state.providers = state.providers.map((provider) => {
if (provider.provider === 'llamacpp' && provider.settings) {
provider.settings = provider.settings.map((setting) => {
if (setting.key === 'cont_batching') {
return {
...setting,
description: 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
}
}
return setting
})
}
return provider
})
}
return state
},
version: 1,
} }
) )
) )