Merge branch 'dev' into feat/old-mac-support
This commit is contained in:
commit
e51847830a
@ -153,7 +153,7 @@
|
||||
{
|
||||
"key": "cont_batching",
|
||||
"title": "Continuous Batching",
|
||||
"description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests (default: enabled).",
|
||||
"description": "Enable continuous batching (a.k.a dynamic batching) for concurrent requests.",
|
||||
"controllerType": "checkbox",
|
||||
"controllerProps": {
|
||||
"value": false
|
||||
|
||||
@ -185,40 +185,76 @@ pub async fn load_llama_model(
|
||||
|
||||
// Spawn task to monitor stdout for readiness
|
||||
let _stdout_task = tokio::spawn(async move {
|
||||
let mut reader = BufReader::new(stdout).lines();
|
||||
while let Ok(Some(line)) = reader.next_line().await {
|
||||
let mut reader = BufReader::new(stdout);
|
||||
let mut byte_buffer = Vec::new();
|
||||
|
||||
loop {
|
||||
byte_buffer.clear();
|
||||
match reader.read_until(b'\n', &mut byte_buffer).await {
|
||||
Ok(0) => break, // EOF
|
||||
Ok(_) => {
|
||||
let line = String::from_utf8_lossy(&byte_buffer);
|
||||
let line = line.trim_end();
|
||||
if !line.is_empty() {
|
||||
log::info!("[llamacpp stdout] {}", line);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Error reading stdout: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Spawn task to capture stderr and monitor for errors
|
||||
let stderr_task = tokio::spawn(async move {
|
||||
let mut reader = BufReader::new(stderr).lines();
|
||||
let mut reader = BufReader::new(stderr);
|
||||
let mut byte_buffer = Vec::new();
|
||||
let mut stderr_buffer = String::new();
|
||||
while let Ok(Some(line)) = reader.next_line().await {
|
||||
log::info!("[llamacpp] {}", line); // Using your log format
|
||||
stderr_buffer.push_str(&line);
|
||||
|
||||
loop {
|
||||
byte_buffer.clear();
|
||||
match reader.read_until(b'\n', &mut byte_buffer).await {
|
||||
Ok(0) => break, // EOF
|
||||
Ok(_) => {
|
||||
let line = String::from_utf8_lossy(&byte_buffer);
|
||||
let line = line.trim_end();
|
||||
|
||||
if !line.is_empty() {
|
||||
stderr_buffer.push_str(line);
|
||||
stderr_buffer.push('\n');
|
||||
log::info!("[llamacpp] {}", line);
|
||||
|
||||
// Check for critical error indicators that should stop the process
|
||||
// TODO: check for different errors
|
||||
if line.to_lowercase().contains("error")
|
||||
|| line.to_lowercase().contains("failed")
|
||||
|| line.to_lowercase().contains("fatal")
|
||||
|| line.contains("CUDA error")
|
||||
|| line.contains("out of memory")
|
||||
|| line.contains("failed to load")
|
||||
let line_lower = line.to_string().to_lowercase();
|
||||
if line_lower.contains("error loading model")
|
||||
|| line_lower.contains("unknown model architecture")
|
||||
|| line_lower.contains("fatal")
|
||||
|| line_lower.contains("cuda error")
|
||||
|| line_lower.contains("out of memory")
|
||||
|| line_lower.contains("error")
|
||||
|| line_lower.contains("failed")
|
||||
{
|
||||
let _ = error_tx.send(line.clone()).await;
|
||||
let _ = error_tx.send(line.to_string()).await;
|
||||
}
|
||||
// Check for readiness indicator - llama-server outputs this when ready
|
||||
else if line.contains("server is listening on")
|
||||
|| line.contains("starting the main loop")
|
||||
|| line.contains("server listening on")
|
||||
{
|
||||
log::info!("Server appears to be ready based on stdout: '{}'", line);
|
||||
log::info!("Server appears to be ready based on stderr: '{}'", line);
|
||||
let _ = ready_tx.send(true).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Error reading stderr: {}", e);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stderr_buffer
|
||||
});
|
||||
|
||||
@ -226,7 +262,7 @@ pub async fn load_llama_model(
|
||||
if let Some(status) = child.try_wait()? {
|
||||
if !status.success() {
|
||||
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||
log::error!("llama.cpp exited early with code {status:?}");
|
||||
log::error!("llama.cpp exited early with code {:?}", status);
|
||||
log::error!("--- stderr ---\n{}", stderr_output);
|
||||
return Err(ServerError::LlamacppError(stderr_output.trim().to_string()));
|
||||
}
|
||||
@ -246,25 +282,43 @@ pub async fn load_llama_model(
|
||||
// Error occurred
|
||||
Some(error_msg) = error_rx.recv() => {
|
||||
log::error!("Server encountered an error: {}", error_msg);
|
||||
|
||||
// Give process a moment to exit naturally
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
|
||||
// Check if process already exited
|
||||
if let Some(status) = child.try_wait()? {
|
||||
log::info!("Process exited with code {:?}", status);
|
||||
return Err(ServerError::LlamacppError(error_msg));
|
||||
} else {
|
||||
log::info!("Process still running, killing it...");
|
||||
let _ = child.kill().await;
|
||||
}
|
||||
|
||||
// Get full stderr output
|
||||
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||
return Err(ServerError::LlamacppError(format!("Error: {}\n\nFull stderr:\n{}", error_msg, stderr_output)));
|
||||
}
|
||||
// Timeout
|
||||
_ = tokio::time::sleep(Duration::from_millis(100)) => {
|
||||
// Check for process exit more frequently
|
||||
_ = tokio::time::sleep(Duration::from_millis(50)) => {
|
||||
// Check if process exited
|
||||
if let Some(status) = child.try_wait()? {
|
||||
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||
if !status.success() {
|
||||
log::error!("llama.cpp exited with error code {:?}", status);
|
||||
return Err(ServerError::LlamacppError(format!("Process exited with code {:?}\n\nStderr:\n{}", status, stderr_output)));
|
||||
} else {
|
||||
log::error!("llama.cpp exited successfully but without ready signal");
|
||||
return Err(ServerError::LlamacppError(format!("Process exited unexpectedly\n\nStderr:\n{}", stderr_output)));
|
||||
}
|
||||
}
|
||||
|
||||
// Timeout check
|
||||
if start_time.elapsed() > timeout_duration {
|
||||
log::error!("Timeout waiting for server to be ready");
|
||||
let _ = child.kill().await;
|
||||
return Err(ServerError::LlamacppError("Server startup timeout".to_string()));
|
||||
}
|
||||
// Check if process is still alive
|
||||
if let Some(status) = child.try_wait()? {
|
||||
if !status.success() {
|
||||
let stderr_output = stderr_task.await.unwrap_or_default();
|
||||
log::error!("llama.cpp exited during startup with code {status:?}");
|
||||
return Err(ServerError::LlamacppError(format!("Process exited with code {status:?}\n\nStderr:\n{}", stderr_output)));
|
||||
}
|
||||
return Err(ServerError::LlamacppError(format!("Server startup timeout\n\nStderr:\n{}", stderr_output)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -331,7 +385,10 @@ pub async fn unload_llama_model(
|
||||
#[cfg(all(windows, target_arch = "x86_64"))]
|
||||
{
|
||||
if let Some(raw_pid) = child.id() {
|
||||
log::warn!("gracefully killing is unsupported on Windows, force-killing PID {}", raw_pid);
|
||||
log::warn!(
|
||||
"gracefully killing is unsupported on Windows, force-killing PID {}",
|
||||
raw_pid
|
||||
);
|
||||
|
||||
// Since we know a graceful shutdown doesn't work and there are no child processes
|
||||
// to worry about, we can use `child.kill()` directly. On Windows, this is
|
||||
|
||||
@ -266,14 +266,7 @@ describe('useSmallScreenStore', () => {
|
||||
})
|
||||
|
||||
describe('useSmallScreen', () => {
|
||||
beforeEach(() => {
|
||||
// Reset the store state before each test
|
||||
act(() => {
|
||||
useSmallScreenStore.getState().setIsSmallScreen(false)
|
||||
})
|
||||
})
|
||||
|
||||
it('should return small screen state and update store', () => {
|
||||
it('should return small screen state', () => {
|
||||
const mockMediaQueryList = {
|
||||
matches: true,
|
||||
addEventListener: vi.fn(),
|
||||
@ -285,7 +278,6 @@ describe('useSmallScreen', () => {
|
||||
const { result } = renderHook(() => useSmallScreen())
|
||||
|
||||
expect(result.current).toBe(true)
|
||||
expect(useSmallScreenStore.getState().isSmallScreen).toBe(true)
|
||||
})
|
||||
|
||||
it('should update when media query changes', () => {
|
||||
@ -309,7 +301,6 @@ describe('useSmallScreen', () => {
|
||||
})
|
||||
|
||||
expect(result.current).toBe(true)
|
||||
expect(useSmallScreenStore.getState().isSmallScreen).toBe(true)
|
||||
})
|
||||
|
||||
it('should use correct media query for small screen detection', () => {
|
||||
@ -325,20 +316,4 @@ describe('useSmallScreen', () => {
|
||||
|
||||
expect(mockMatchMedia).toHaveBeenCalledWith('(max-width: 768px)')
|
||||
})
|
||||
|
||||
it('should persist state across multiple hook instances', () => {
|
||||
const mockMediaQueryList = {
|
||||
matches: true,
|
||||
addEventListener: vi.fn(),
|
||||
removeEventListener: vi.fn(),
|
||||
}
|
||||
|
||||
mockMatchMedia.mockReturnValue(mockMediaQueryList)
|
||||
|
||||
const { result: result1 } = renderHook(() => useSmallScreen())
|
||||
const { result: result2 } = renderHook(() => useSmallScreen())
|
||||
|
||||
expect(result1.current).toBe(true)
|
||||
expect(result2.current).toBe(true)
|
||||
})
|
||||
})
|
||||
@ -77,14 +77,7 @@ export function useMediaQuery(
|
||||
return matches || false
|
||||
}
|
||||
|
||||
// Specific hook for small screen detection with state management
|
||||
// Specific hook for small screen detection
|
||||
export const useSmallScreen = (): boolean => {
|
||||
const { isSmallScreen, setIsSmallScreen } = useSmallScreenStore()
|
||||
const mediaQuery = useMediaQuery('(max-width: 768px)')
|
||||
|
||||
useEffect(() => {
|
||||
setIsSmallScreen(mediaQuery)
|
||||
}, [mediaQuery, setIsSmallScreen])
|
||||
|
||||
return isSmallScreen
|
||||
return useMediaQuery('(max-width: 768px)')
|
||||
}
|
||||
|
||||
@ -210,6 +210,29 @@ export const useModelProvider = create<ModelProviderState>()(
|
||||
{
|
||||
name: localStorageKey.modelProvider,
|
||||
storage: createJSONStorage(() => localStorage),
|
||||
migrate: (persistedState: unknown, version: number) => {
|
||||
const state = persistedState as ModelProviderState
|
||||
|
||||
// Migration for cont_batching description update (version 0 -> 1)
|
||||
if (version === 0 && state?.providers) {
|
||||
state.providers = state.providers.map((provider) => {
|
||||
if (provider.provider === 'llamacpp' && provider.settings) {
|
||||
provider.settings = provider.settings.map((setting) => {
|
||||
if (setting.key === 'cont_batching') {
|
||||
return {
|
||||
...setting,
|
||||
description: 'Enable continuous batching (a.k.a dynamic batching) for concurrent requests.'
|
||||
}
|
||||
}
|
||||
return setting
|
||||
})
|
||||
}
|
||||
return provider
|
||||
})
|
||||
}
|
||||
return state
|
||||
},
|
||||
version: 1,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user