import asyncio import logging import os import time from datetime import datetime from pathlib import Path import threading from utils import force_close_jan, is_jan_running, start_jan_app from migration_utils import install_jan_version, prepare_migration_environment from test_runner import run_single_test_with_timeout from agent import ComputerAgent, LLM from screen_recorder import ScreenRecorder from reportportal_handler import upload_test_results_to_rp from utils import get_latest_trajectory_folder from reportportal_handler import extract_test_result_from_trajectory logger = logging.getLogger(__name__) async def run_single_test_with_timeout_no_restart(computer, test_data, rp_client, launch_id, max_turns=30, jan_app_path=None, jan_process_name="Jan.exe", agent_config=None, enable_reportportal=False): """ Run a single test case WITHOUT restarting the Jan app - assumes app is already running Returns dict with test result: {"success": bool, "status": str, "message": str} """ path = test_data['path'] prompt = test_data['prompt'] # Detect if using nightly version based on process name is_nightly = "nightly" in jan_process_name.lower() if jan_process_name else False # Default agent config if not provided if agent_config is None: agent_config = { "loop": "uitars", "model_provider": "oaicompat", "model_name": "ByteDance-Seed/UI-TARS-1.5-7B", "model_base_url": "http://10.200.108.58:1234/v1" } # Create trajectory_dir from path (remove .txt extension) trajectory_name = str(Path(path).with_suffix('')) trajectory_base_dir = os.path.abspath(f"trajectories/{trajectory_name.replace(os.sep, '/')}") # Ensure trajectories directory exists os.makedirs(os.path.dirname(trajectory_base_dir), exist_ok=True) # Create recordings directory recordings_dir = "recordings" os.makedirs(recordings_dir, exist_ok=True) # Create video filename current_time = datetime.now().strftime("%Y%m%d_%H%M%S") safe_test_name = trajectory_name.replace('/', '_').replace('\\', '_') video_filename = f"{safe_test_name}_{current_time}.mp4" video_path = os.path.abspath(os.path.join(recordings_dir, video_filename)) # Initialize screen recorder recorder = ScreenRecorder(video_path, fps=10) try: # Check if Jan app is running (don't restart) from utils import is_jan_running if not is_jan_running(jan_process_name): logger.warning(f"Jan application ({jan_process_name}) is not running, but continuing anyway") else: # Ensure window is maximized for this test from utils import maximize_jan_window if maximize_jan_window(): logger.info("Jan application window maximized for test") else: logger.warning("Could not maximize Jan application window for test") # Start screen recording recorder.start_recording() # Create agent for this test using config agent = ComputerAgent( computer=computer, loop=agent_config["loop"], model=LLM( provider=agent_config["model_provider"], name=agent_config["model_name"], provider_base_url=agent_config["model_base_url"] ), trajectory_dir=trajectory_base_dir ) # Run the test with prompt logger.info(f"Running test case: {path}") async for result in agent.run(prompt): logger.info(f"Test result for {path}: {result}") print(result) # Stop screen recording recorder.stop_recording() # Extract test result trajectory_folder = get_latest_trajectory_folder(path) test_result = extract_test_result_from_trajectory(trajectory_folder) # Upload to ReportPortal if enabled if enable_reportportal and rp_client and launch_id: upload_test_results_to_rp(rp_client, launch_id, test_result, trajectory_folder) return test_result except Exception as e: logger.error(f"Test failed with exception: {e}") recorder.stop_recording() return {"success": False, "status": "error", "message": str(e)} finally: # Stop screen recording recorder.stop_recording() # Don't close Jan app - let it keep running for the next test logger.info(f"Completed test: {path} (Jan app kept running)") async def run_batch_migration_test(computer, old_version_path, new_version_path, rp_client=None, launch_id=None, max_turns=30, agent_config=None, enable_reportportal=False, test_cases=None): """ Run migration test with batch approach: all setups first, then upgrade, then all verifies This approach is more realistic (like a real user) but less granular for debugging """ from individual_migration_runner import MIGRATION_TEST_CASES if test_cases is None: test_cases = list(MIGRATION_TEST_CASES.keys()) logger.info("=" * 100) logger.info("RUNNING BATCH MIGRATION TESTS") logger.info("=" * 100) logger.info(f"Test cases: {', '.join(test_cases)}") logger.info("Approach: Setup All → Upgrade → Verify All") logger.info("") batch_result = { "overall_success": False, "setup_phase_success": False, "upgrade_success": False, "verification_phase_success": False, "setup_results": {}, "verify_results": {}, "error_message": None } try: # Prepare migration environment env_setup = prepare_migration_environment() logger.info(f"Migration environment prepared: {env_setup}") # PHASE 1: Install old version and run ALL setup tests logger.info("=" * 80) logger.info("PHASE 1: BATCH SETUP ON OLD VERSION") logger.info("=" * 80) install_jan_version(old_version_path, "old") time.sleep(15) # Extra wait time for stability # Force close any existing Jan processes before starting fresh logger.info("Force closing any existing Jan processes...") force_close_jan("Jan.exe") force_close_jan("Jan-nightly.exe") time.sleep(5) # Wait for processes to fully close # Start Jan app once for the entire setup phase logger.info("Starting Jan application for setup phase...") start_jan_app() time.sleep(10) # Wait for app to be ready # Ensure window is maximized for testing from utils import maximize_jan_window if maximize_jan_window(): logger.info("Jan application window maximized for setup phase") else: logger.warning("Could not maximize Jan application window for setup phase") setup_failures = 0 for i, test_case_key in enumerate(test_cases, 1): test_case = MIGRATION_TEST_CASES[test_case_key] logger.info(f"[{i}/{len(test_cases)}] Running setup: {test_case['name']}") # Support both single setup_test and multiple setup_tests setup_files = [] if 'setup_tests' in test_case: setup_files = test_case['setup_tests'] elif 'setup_test' in test_case: setup_files = [test_case['setup_test']] else: logger.error(f"No setup tests defined for {test_case_key}") batch_result["setup_results"][test_case_key] = False setup_failures += 1 continue # Run all setup files for this test case test_case_setup_success = True for j, setup_file in enumerate(setup_files, 1): logger.info(f" [{j}/{len(setup_files)}] Running setup file: {setup_file}") # Load and run setup test setup_test_path = f"tests/migration/{setup_file}" if not os.path.exists(setup_test_path): logger.error(f"Setup test file not found: {setup_test_path}") test_case_setup_success = False continue with open(setup_test_path, "r", encoding="utf-8") as f: setup_content = f.read() setup_test_data = { "path": setup_file, "prompt": setup_content } # Run test without restarting Jan app (assumes Jan is already running) setup_result = await run_single_test_with_timeout_no_restart( computer=computer, test_data=setup_test_data, rp_client=rp_client, launch_id=launch_id, max_turns=max_turns, jan_app_path=None, jan_process_name="Jan.exe", agent_config=agent_config, enable_reportportal=enable_reportportal ) success = setup_result.get("success", False) if setup_result else False if success: logger.info(f" ✅ Setup file {setup_file}: SUCCESS") else: logger.error(f" ❌ Setup file {setup_file}: FAILED") test_case_setup_success = False # Small delay between setup files time.sleep(3) # Record overall result for this test case batch_result["setup_results"][test_case_key] = test_case_setup_success if test_case_setup_success: logger.info(f"✅ Setup {test_case_key}: SUCCESS (all {len(setup_files)} files completed)") else: logger.error(f"❌ Setup {test_case_key}: FAILED (one or more files failed)") setup_failures += 1 # Small delay between setups time.sleep(3) batch_result["setup_phase_success"] = setup_failures == 0 logger.info(f"Setup phase complete: {len(test_cases) - setup_failures}/{len(test_cases)} successful") if setup_failures > 0: logger.warning(f"{setup_failures} setup tests failed - continuing with upgrade anyway") # PHASE 2: Upgrade to new version logger.info("=" * 80) logger.info("PHASE 2: UPGRADING TO NEW VERSION") logger.info("=" * 80) force_close_jan("Jan.exe") force_close_jan("Jan-nightly.exe") time.sleep(5) install_jan_version(new_version_path, "new") batch_result["upgrade_success"] = True time.sleep(15) # Extra wait time after upgrade # Force close any existing Jan processes before starting fresh logger.info("Force closing any existing Jan processes...") force_close_jan("Jan.exe") force_close_jan("Jan-nightly.exe") time.sleep(5) # Wait for processes to fully close # Start Jan app once for the entire verification phase logger.info("Starting Jan application for verification phase...") start_jan_app() time.sleep(10) # Wait for app to be ready # Ensure window is maximized for testing from utils import maximize_jan_window if maximize_jan_window(): logger.info("Jan application window maximized for verification phase") else: logger.warning("Could not maximize Jan application window for verification phase") # PHASE 3: Run ALL verification tests on new version logger.info("=" * 80) logger.info("PHASE 3: BATCH VERIFICATION ON NEW VERSION") logger.info("=" * 80) verify_failures = 0 for i, test_case_key in enumerate(test_cases, 1): test_case = MIGRATION_TEST_CASES[test_case_key] logger.info(f"[{i}/{len(test_cases)}] Running verification: {test_case['name']}") # Skip verification if setup failed (optional - you could still try) if not batch_result["setup_results"].get(test_case_key, False): logger.warning(f"Skipping verification for {test_case_key} - setup failed") batch_result["verify_results"][test_case_key] = False verify_failures += 1 continue # Support both single verify_test and multiple verify_tests verify_files = [] if 'verify_tests' in test_case: verify_files = test_case['verify_tests'] elif 'verify_test' in test_case: verify_files = [test_case['verify_test']] else: logger.error(f"No verify tests defined for {test_case_key}") batch_result["verify_results"][test_case_key] = False verify_failures += 1 continue # Run all verify files for this test case test_case_verify_success = True for j, verify_file in enumerate(verify_files, 1): logger.info(f" [{j}/{len(verify_files)}] Running verify file: {verify_file}") # Load and run verification test verify_test_path = f"tests/migration/{verify_file}" if not os.path.exists(verify_test_path): logger.error(f"Verification test file not found: {verify_test_path}") test_case_verify_success = False continue with open(verify_test_path, "r", encoding="utf-8") as f: verify_content = f.read() verify_test_data = { "path": verify_file, "prompt": verify_content } # Run test without restarting Jan app (assumes Jan is already running) verify_result = await run_single_test_with_timeout_no_restart( computer=computer, test_data=verify_test_data, rp_client=rp_client, launch_id=launch_id, max_turns=max_turns, jan_app_path=None, jan_process_name="Jan.exe", agent_config=agent_config, enable_reportportal=enable_reportportal ) success = verify_result.get("success", False) if verify_result else False if success: logger.info(f" ✅ Verify file {verify_file}: SUCCESS") else: logger.error(f" ❌ Verify file {verify_file}: FAILED") test_case_verify_success = False # Small delay between verify files time.sleep(3) # Record overall result for this test case batch_result["verify_results"][test_case_key] = test_case_verify_success if test_case_verify_success: logger.info(f"✅ Verify {test_case_key}: SUCCESS (all {len(verify_files)} files completed)") else: logger.error(f"❌ Verify {test_case_key}: FAILED (one or more files failed)") verify_failures += 1 # Small delay between verifications time.sleep(3) batch_result["verification_phase_success"] = verify_failures == 0 logger.info(f"Verification phase complete: {len(test_cases) - verify_failures}/{len(test_cases)} successful") # Overall success calculation batch_result["overall_success"] = ( batch_result["setup_phase_success"] and batch_result["upgrade_success"] and batch_result["verification_phase_success"] ) # Final summary logger.info("=" * 100) logger.info("BATCH MIGRATION TEST SUMMARY") logger.info("=" * 100) logger.info(f"Overall Success: {batch_result['overall_success']}") logger.info(f"Setup Phase: {batch_result['setup_phase_success']} ({len(test_cases) - setup_failures}/{len(test_cases)})") logger.info(f"Upgrade Phase: {batch_result['upgrade_success']}") logger.info(f"Verification Phase: {batch_result['verification_phase_success']} ({len(test_cases) - verify_failures}/{len(test_cases)})") logger.info("") logger.info("Detailed Results:") for test_case_key in test_cases: setup_status = "✅" if batch_result["setup_results"].get(test_case_key, False) else "❌" verify_status = "✅" if batch_result["verify_results"].get(test_case_key, False) else "❌" logger.info(f" {test_case_key.ljust(20)}: Setup {setup_status} | Verify {verify_status}") return batch_result except Exception as e: logger.error(f"Batch migration test failed with exception: {e}") batch_result["error_message"] = str(e) return batch_result finally: # Cleanup force_close_jan("Jan.exe") force_close_jan("Jan-nightly.exe")