feat: Implement Cortex server auto-restart and webview notification (#5074)

* feat: Implement Cortex server auto-restart and webview notification

Implements a robust auto-restart mechanism for the Cortex server (sidecar)
managed by the Tauri backend.

Key changes:

Backend (src-tauri):
- Modified `core/setup.rs` to:
  - Loop sidecar spawning, attempting up to `MAX_RESTARTS` (5) times with a
    `RESTART_DELAY_MS` (5 seconds) between attempts.
  - Monitor the sidecar process for unexpected termination (crashes or
    non-zero exit codes).
  - Reset the restart attempt count to 0 in `AppState` upon a successful
    server spawn.
  - Emit a "cortex_max_restarts_reached" event to the webview if the
    server fails to start after `MAX_RESTARTS`.
- Updated `core/state.rs` to include `cortex_restart_count: Arc<Mutex<u32>>`
  in `AppState` to track restart attempts.
- Added a new Tauri command `reset_cortex_restart_count` in `core/cmd.rs`
  to allow the webview (or other parts of the app) to reset this counter.
- Registered the new command and initialized the `cortex_restart_count`
  in `lib.rs`.

Frontend (web-app):
- Created a new component `CortexFailureDialog.tsx` in
  `src/containers/dialogs/` to:
  - Listen for the "cortex_max_restarts_reached" event from Tauri.
  - Display a dialog informing the user that the local AI engine (Cortex)
    failed to start after multiple attempts.
  - Offer options to "Contact Support" (opens jan.ai/support),
    "Restart Jan" (invokes the `relaunch` Tauri command), or "Okay"
    (dismisses the dialog).
- Integrated the `CortexFailureDialog` into the `RootLayout` in
  `src/routes/__root.tsx` so it's globally available.
- Corrected button variants in `__root.tsx` to use `variant="default"`
  with appropriate classNames for outline styling, resolving TypeScript
  errors.

* refactor: Improve async handling and logging in setup_sidecar function
This commit is contained in:
Sam Hoang Van 2025-05-22 23:09:43 +07:00 committed by GitHub
parent 942f2f51b7
commit 7df7d8ffa0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 293 additions and 60 deletions

View File

@ -313,6 +313,14 @@ fn copy_dir_recursive(src: &PathBuf, dst: &PathBuf) -> Result<(), io::Error> {
Ok(())
}
#[tauri::command]
pub async fn reset_cortex_restart_count(state: State<'_, AppState>) -> Result<(), String> {
let mut count = state.cortex_restart_count.lock().await;
*count = 0;
log::info!("Cortex server restart count reset to 0.");
Ok(())
}
#[tauri::command]
pub fn change_app_data_folder(
app_handle: tauri::AppHandle,

View File

@ -3,13 +3,15 @@ use std::{
fs::{self, File},
io::Read,
path::PathBuf,
sync::{Arc, Mutex},
sync::Arc,
};
use tar::Archive;
use tauri::{App, Emitter, Listener, Manager};
use tauri_plugin_shell::process::CommandEvent;
use tauri_plugin_shell::process::{CommandChild, CommandEvent};
use tauri_plugin_shell::ShellExt;
use tauri_plugin_store::StoreExt;
use tokio::sync::Mutex; // Using tokio::sync::Mutex
use tokio::time::{sleep, Duration};
// MCP
use super::{
@ -204,65 +206,194 @@ pub fn setup_mcp(app: &App) {
}
pub fn setup_sidecar(app: &App) -> Result<(), String> {
// Setup sidecar
let app_state = app.state::<AppState>();
let app_data_dir = get_jan_data_folder_path(app.handle().clone());
let mut sidecar_command = app.shell().sidecar("cortex-server").unwrap().args([
"--start-server",
"--port",
"39291",
"--config_file_path",
app_data_dir.join(".janrc").to_str().unwrap(),
"--data_folder_path",
app_data_dir.to_str().unwrap(),
"--cors",
"ON",
"--allowed_origins",
"http://localhost:3000,http://localhost:1420,tauri://localhost,http://tauri.localhost",
"config",
"--api_keys",
app_state.inner().app_token.as_deref().unwrap_or(""),
]);
#[cfg(target_os = "windows")]
{
sidecar_command = sidecar_command.env("PATH", {
let app_data_dir = app.app_handle().path().app_data_dir().unwrap();
let dest = app_data_dir.to_str().unwrap();
let path = std::env::var("PATH").unwrap_or_default();
format!("{}{}{}", path, std::path::MAIN_SEPARATOR, dest)
});
}
#[cfg(not(target_os = "windows"))]
{
sidecar_command = sidecar_command.env("LD_LIBRARY_PATH", {
let app_data_dir = app.app_handle().path().app_data_dir().unwrap();
let dest = app_data_dir.to_str().unwrap();
let ld_library_path = std::env::var("LD_LIBRARY_PATH").unwrap_or_default();
format!("{}{}{}", ld_library_path, std::path::MAIN_SEPARATOR, dest)
});
}
let (mut rx, _child) = sidecar_command.spawn().expect("Failed to spawn sidecar");
let child = Arc::new(Mutex::new(Some(_child)));
let child_clone = child.clone();
let app_handle = app.handle().clone();
tauri::async_runtime::spawn(async move {
// read events such as stdout
while let Some(event) = rx.recv().await {
if let CommandEvent::Stdout(line_bytes) = event {
let line = String::from_utf8_lossy(&line_bytes);
log::info!("Outputs: {:?}", line)
}
}
});
const MAX_RESTARTS: u32 = 5;
const RESTART_DELAY_MS: u64 = 5000;
app.handle().listen("kill-sidecar", move |_| {
let mut child_guard = child_clone.lock().unwrap();
if let Some(actual_child) = child_guard.take() {
actual_child.kill().unwrap();
let app_state = app_handle.state::<AppState>();
let cortex_restart_count_state = app_state.cortex_restart_count.clone();
let app_data_dir = get_jan_data_folder_path(app_handle.clone());
let sidecar_command_builder = || {
let mut cmd = app_handle
.shell()
.sidecar("cortex-server")
.expect("Failed to get sidecar command")
.args([
"--start-server",
"--port",
"39291",
"--config_file_path",
app_data_dir.join(".janrc").to_str().unwrap(),
"--data_folder_path",
app_data_dir.to_str().unwrap(),
"--cors",
"ON",
"--allowed_origins",
"http://localhost:3000,http://localhost:1420,tauri://localhost,http://tauri.localhost",
"config",
"--api_keys",
app_state.inner().app_token.as_deref().unwrap_or(""),
]);
#[cfg(target_os = "windows")]
{
cmd = cmd.env("PATH", {
let current_app_data_dir = app_handle.path().app_data_dir().unwrap();
let dest = current_app_data_dir.to_str().unwrap();
let path_env = std::env::var("PATH").unwrap_or_default();
format!("{}{}{}", path_env, std::path::MAIN_SEPARATOR, dest)
});
}
#[cfg(not(target_os = "windows"))]
{
cmd = cmd.env("LD_LIBRARY_PATH", {
let current_app_data_dir = app_handle.path().app_data_dir().unwrap();
let dest = current_app_data_dir.to_str().unwrap();
let ld_path_env = std::env::var("LD_LIBRARY_PATH").unwrap_or_default();
format!("{}{}{}", ld_path_env, std::path::MAIN_SEPARATOR, dest)
});
}
cmd
};
let child_process: Arc<Mutex<Option<CommandChild>>> = Arc::new(Mutex::new(None));
let child_process_clone_for_kill = child_process.clone();
app_handle.listen("kill-sidecar", move |_event| {
let child_to_kill_arc = child_process_clone_for_kill.clone();
tauri::async_runtime::spawn(async move {
log::info!("Received kill-sidecar event (processing async).");
if let Some(child) = child_to_kill_arc.lock().await.take() {
log::info!("Attempting to kill sidecar process...");
if let Err(e) = child.kill() {
log::error!("Failed to kill sidecar process: {}", e);
} else {
log::info!("Sidecar process killed successfully via event.");
}
} else {
log::warn!("Kill event received, but no active sidecar process found to kill.");
}
});
});
loop {
let current_restart_count = *cortex_restart_count_state.lock().await;
if current_restart_count >= MAX_RESTARTS {
log::error!(
"Cortex server reached maximum restart attempts ({}). Giving up.",
current_restart_count
);
if let Err(e) = app_handle.emit("cortex_max_restarts_reached", ()) {
log::error!("Failed to emit cortex_max_restarts_reached event: {}", e);
}
break;
}
log::info!(
"Spawning cortex-server (Attempt {}/{})",
current_restart_count + 1,
MAX_RESTARTS
);
let current_command = sidecar_command_builder();
match current_command.spawn() {
Ok((mut rx, child_instance)) => {
log::info!(
"Cortex server spawned successfully. PID: {:?}",
child_instance.pid()
);
*child_process.lock().await = Some(child_instance);
{
let mut count = cortex_restart_count_state.lock().await;
if *count > 0 {
log::info!(
"Cortex server started successfully, resetting restart count from {} to 0.",
*count
);
*count = 0;
}
}
let mut process_terminated_unexpectedly = false;
while let Some(event) = rx.recv().await {
match event {
CommandEvent::Stdout(line_bytes) => {
log::info!(
"[Cortex STDOUT]: {}",
String::from_utf8_lossy(&line_bytes)
);
}
CommandEvent::Stderr(line_bytes) => {
log::error!(
"[Cortex STDERR]: {}",
String::from_utf8_lossy(&line_bytes)
);
}
CommandEvent::Error(message) => {
log::error!("[Cortex ERROR]: {}", message);
process_terminated_unexpectedly = true;
break;
}
CommandEvent::Terminated(payload) => {
log::info!(
"[Cortex Terminated]: Signal {:?}, Code {:?}",
payload.signal,
payload.code
);
if child_process.lock().await.is_some() {
if payload.code.map_or(true, |c| c != 0) {
process_terminated_unexpectedly = true;
}
}
break;
}
_ => {}
}
}
if child_process.lock().await.is_some() {
*child_process.lock().await = None;
log::info!("Cleared child process lock after termination.");
}
if process_terminated_unexpectedly {
log::warn!("Cortex server terminated unexpectedly.");
let mut count = cortex_restart_count_state.lock().await;
*count += 1;
log::info!(
"Waiting {}ms before attempting restart {}/{}...",
RESTART_DELAY_MS,
*count,
MAX_RESTARTS
);
drop(count);
sleep(Duration::from_millis(RESTART_DELAY_MS)).await;
continue;
} else {
log::info!(
"Cortex server terminated normally or was killed. Not restarting."
);
break;
}
}
Err(e) => {
log::error!("Failed to spawn cortex-server: {}", e);
let mut count = cortex_restart_count_state.lock().await;
*count += 1;
log::info!(
"Waiting {}ms before attempting restart {}/{} due to spawn failure...",
RESTART_DELAY_MS,
*count,
MAX_RESTARTS
);
drop(count);
sleep(Duration::from_millis(RESTART_DELAY_MS)).await;
}
}
}
});
Ok(())
@ -301,4 +432,4 @@ pub fn setup_engine_binaries(app: &App) -> Result<(), String> {
log::error!("Failed to copy themes: {}", e);
}
Ok(())
}
}

View File

@ -10,6 +10,7 @@ pub struct AppState {
pub app_token: Option<String>,
pub mcp_servers: Arc<Mutex<HashMap<String, RunningService<RoleClient, ()>>>>,
pub download_manager: Arc<Mutex<DownloadManagerState>>,
pub cortex_restart_count: Arc<Mutex<u32>>,
}
pub fn generate_app_token() -> String {
rand::thread_rng()

View File

@ -49,6 +49,7 @@ pub fn run() {
core::cmd::read_logs,
core::cmd::handle_app_update,
core::cmd::change_app_data_folder,
core::cmd::reset_cortex_restart_count,
// MCP commands
core::mcp::get_tools,
core::mcp::call_tool,
@ -77,6 +78,7 @@ pub fn run() {
app_token: Some(generate_app_token()),
mcp_servers: Arc::new(Mutex::new(HashMap::new())),
download_manager: Arc::new(Mutex::new(DownloadManagerState::default())),
cortex_restart_count: Arc::new(Mutex::new(0)),
})
.setup(|app| {
app.handle().plugin(

View File

@ -0,0 +1,88 @@
import { useEffect, useState } from 'react'
import { listen } from '@tauri-apps/api/event'
import { invoke } from '@tauri-apps/api/core'
import { t } from 'i18next'
import {
Dialog,
DialogContent,
DialogDescription,
DialogFooter,
DialogHeader,
DialogTitle,
} from '@/components/ui/dialog'
import { Button } from '@/components/ui/button'
export function CortexFailureDialog() {
const [showDialog, setShowDialog] = useState(false)
useEffect(() => {
let unlisten: (() => void) | undefined
const setupListener = async () => {
unlisten = await listen<null>(
'cortex_max_restarts_reached',
(event) => {
console.log('Cortex max restarts reached event received:', event)
setShowDialog(true)
}
)
}
setupListener()
return () => {
if (unlisten) {
unlisten()
}
}
}, [])
const handleRestartJan = async () => {
try {
await invoke('relaunch')
} catch (error) {
console.error('Failed to relaunch app:', error)
alert(
'Failed to automatically restart. Please close and reopen Jan manually.'
)
}
}
if (!showDialog) {
return null
}
return (
<Dialog open={showDialog} onOpenChange={setShowDialog}>
<DialogContent>
<DialogHeader>
<DialogTitle>{t('cortexFailureDialog.title', 'Local AI Engine Issue')}</DialogTitle>
</DialogHeader>
<DialogDescription>
{t('cortexFailureDialog.description', 'The local AI engine (Cortex) failed to start after multiple attempts. This might prevent some features from working correctly.')}
</DialogDescription>
<DialogFooter className="gap-2 sm:gap-0">
<Button
variant="default"
className="bg-transparent border border-main-view-fg/20 hover:bg-main-view-fg/10"
onClick={() => {
window.open('https://jan.ai/support', '_blank')
setShowDialog(false)
}}
>
{t('cortexFailureDialog.contactSupport', 'Contact Support')}
</Button>
<Button
variant="default"
className="bg-transparent border border-main-view-fg/20 hover:bg-main-view-fg/10"
onClick={handleRestartJan}
>
{t('cortexFailureDialog.restartJan', 'Restart Jan')}
</Button>
<Button onClick={() => setShowDialog(false)}>
{t('common.okay', 'Okay')}
</Button>
</DialogFooter>
</DialogContent>
</Dialog>
)
}

View File

@ -3,6 +3,7 @@ import { createRootRoute, Outlet, useRouterState } from '@tanstack/react-router'
import LeftPanel from '@/containers/LeftPanel'
import DialogAppUpdater from '@/containers/dialogs/AppUpdater'
import { CortexFailureDialog } from '@/containers/dialogs/CortexFailureDialog' // Added import
import { Fragment } from 'react/jsx-runtime'
import { AppearanceProvider } from '@/providers/AppearanceProvider'
import { ThemeProvider } from '@/providers/ThemeProvider'
@ -59,6 +60,7 @@ const LogsLayout = () => {
function RootLayout() {
const router = useRouterState()
const isLocalAPIServerLogsRoute =
router.location.pathname === route.localApiServerlogs ||
router.location.pathname === route.systemMonitor ||
@ -74,6 +76,7 @@ function RootLayout() {
</ExtensionProvider>
{isLocalAPIServerLogsRoute ? <LogsLayout /> : <AppLayout />}
{/* <TanStackRouterDevtools position="bottom-right" /> */}
<CortexFailureDialog />
</Fragment>
)
}